Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Functions and Global Variables defined in Libraries

55 views
Skip to first unread message

Frederick Virchanza Gotham

unread,
Mar 7, 2023, 6:50:01 AM3/7/23
to

Let's say we have a program that links with a library that exports a global variable and a function. So the library looks like this:

int lib_global_variable = 0;

void Func(void) { }

The main program has the following declarations:

extern int lib_global_variable;

extern void Func(void);

The program links fine and runs fine if we give the linker "-L. -lname_of_library".

If we use the program "nm" on the main executable and grep for "lib_global_variable" and "Func", we see that both are listed as undefined symbols:

U lib_global_variable
U _Z7LibFuncv

If we use 'readelf' on the main executable and grep for the same two symbols, we see:

000000003fc8 000700000006 R_X86_64_GLOB_DAT 0000000000000000 lib_global_variable + 0
000000004038 000900000007 R_X86_64_JUMP_SLO 0000000000000000 _Z7LibFuncv + 0
7: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND lib_global_variable
9: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND _Z7LibFuncv
39: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND lib_global_variable
41: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND _Z7LibFuncv

I've been doing some testing and tinkering, and I've found that the strategy of using 'dlopen' at runtime to load a library works fine so long as the undefined symbol is listed under R_X86_64_JUMP_SLO. It doesn't work if the symbol is listed under R_X86_64_GLOB_DAT.

Typically all undefined functions get listed under R_X86_64_JUMP_SLO, and all global variables get listed under R_X86_64_GLOB_DAT, however it is possible to get functions listed under R_X86_64_GLOB_DAT, and my strategy of using 'dlopen' doesn't work if the function is under R_X86_64_GLOB_DAT.

It seems that GNU g++ by default puts the undefined function under R_X86_64_JUMP_SLO, however if you try to use the address of the function at all, for example:

cout << (std::uintptr_t)(void*)LibFunc << endl;

then the function gets moved to R_X86_64_GLOB_DAT, and then my strategy no longer works as 'dlopen' doesn't resolve the unresolved symbol.

So I'd like to ask two questions:

(1) Is the R_X86_64_JUMP_SLO category just for functions, or can we put global variables in there too? Is it possible to get 'dlopen' to resolve global variables?

(2) Is there any way to stop the GNU g++ compiler from putting an undefined function in R_X86_64_GLOB_DAT?

Can anyone suggest a good forum / mailing list where people would know a lot about this stuff?

Öö Tiib

unread,
Mar 7, 2023, 7:46:14 AM3/7/23
to
On Tuesday, 7 March 2023 at 13:50:01 UTC+2, Frederick Virchanza Gotham wrote:
>
> I've been doing some testing and tinkering, and I've found that the strategy of using 'dlopen' at runtime to load a library works fine so long as the undefined symbol is listed under R_X86_64_JUMP_SLO. It doesn't work if the symbol is listed under R_X86_64_GLOB_DAT.
>
Then write exported getter function that goes to that R_X86_64_JUMP_SLO. Perhaps
same can be achieved with dlsym().

Frederick Virchanza Gotham

unread,
Mar 8, 2023, 11:34:58 AM3/8/23
to

I've been doing some major tinkering.

The entry point of an executable is '_start'. So, first I wrote a new entry point in x64 assembler that could differentiate between GUI mode and console mode depending upon the value of 'argc':

; This file contains x86_64 assembler for NASM, also known as x64.
; This file contains two functions:
; static void print8bytes(uint64_t eight_chars,uint64_t new_line);
; extern void pre_start(int argc);

section .text

print8bytes: ; This is a function that returns void
; Two parameters:
; r9: The 8-byte string to print
; r8: If true, prints a trailing new line

; save all the register values we're going to use
push rax
push rsi
push rdi
push rdx

;zero out the registers we are going to need
xor rax, rax
xor rsi, rsi
xor rdi, rdi
xor rdx, rdx

;write(int fd, char *msg, unsigned int len)
mov al, 1
add di, 1
mov rsi, r9
push rsi
mov rsi, rsp
mov dl, 8 ; Print 8 bytes at a time
syscall
pop rsi

cmp r8, 1 ; check if r8 is true or false
jl no_new_line
;zero out the registers we are going to need
xor rax, rax
xor rsi, rsi
xor rdi, rdi
xor rdx, rdx
;write(int fd, char *msg, unsigned int len)
mov al, 1
add di, 1
mov rsi, 0x000000000000000a ; new line
push rsi
mov rsi, rsp
mov dl, 1 ; Print just one byte
syscall
pop rsi
no_new_line: ; just a jump label - not a function name
pop rdx
pop rdi
pop rsi
pop rax
ret

global pre_start:function
pre_start:
; The 'argc' argument to 'main' is on the top of the stack so
; we will use the frame pointer 'rbp' to keep track of it.
push rbp
mov rbp, rsp

push r9 ; save because we'll use it - pop it back later
push r8 ; save because we'll use it - pop it back later

mov r8, 0 ; false = don't put trailing new line
mov r9, 0x3d3d3d3d3d3d3d3d ; "========"
call print8bytes
call print8bytes
call print8bytes

mov r9, 0x6174735f65727020 ; " pre_sta"
call print8bytes

cmp qword[rbp+8], 2 ; check if argc < 2
jl $+2+10+2 ; if argc < 2 then we want GUI mode
mov r9, 0x646d63202d207472 ; "rt - cmd"
jmp $+2+10 ; skip the next 10-byte instruction
mov r9, 0x495547202d207472 ; "rt - GUI"
call print8bytes

mov r9, 0x3d3d3d3d3d3d3d3d ; "========"
call print8bytes
call print8bytes
mov r8, 1 ; true = put trailing new line
call print8bytes

pop r9
pop r8

mov rsp, rbp
pop rbp

extern _start
jmp _start


If you see the last line there, I jump straight into _start. So then I build my program with a new entry point as follows:

g++ -o prog prog.cpp object_file_from_assembler.o -e pre_start

When I run it at the command line, the first thing I get is:

======================== pre_start - GUI========================

and then it continues execution as normal. No problems.

So then the next thing I did was I used 'patchelf' to remove the NEEDED for the graphical user interface library:

patchelf --remove-needed libgtk-3.so.0 ./prog

And then I tried to run it again, but this time around I got back:

./ssh: symbol lookup error: ./ssh: undefined symbol: gtk_true

This means that the program falls over ***before*** the entry point is reached.

So the part of the Linux operating system that loads executable files is not even going into the entry point for my program, it's falling over before then. I need to stop this happening some how. Perhaps I can put dummy values in the GOT table so that the loader doesn't think they're null?

Alf P. Steinbach

unread,
Mar 8, 2023, 12:17:58 PM3/8/23
to
On 2023-03-08 5:34 PM, Frederick Virchanza Gotham wrote:
>
> [snip]
> differentiate between GUI mode and console mode depending upon the value of 'argc':
>
> [snip]
> So the part of the Linux operating system that loads executable files
> is not even going into the entry point for my program
So, you were not talking about Windows' console and GUI subsystems, but
about general application behavior.

Still the solution with two separate executables, perhaps with one
invoking the other, seems very much preferable to mucking about with
entry points and whatever, except if you're doing that for the joy of
hacking?

I believe in Linux the average user will have no problem relating to
separate executables.

---

In Windows you can check the executable's subsystem by inspecting the
bytes of the loaded image of the executable.

The start address of that image is available from `GetModuleHandle(0)`.

Happy hacking! :)

- Alf

Frederick Virchanza Gotham

unread,
Mar 8, 2023, 5:46:55 PM3/8/23
to
On Wednesday, March 8, 2023, Alf P. Steinbach wrote:
>
> In Windows you can check the executable's subsystem by inspecting the
> bytes of the loaded image of the executable.
>
> The start address of that image is available from `GetModuleHandle(0)`.
>
> Happy hacking! :)


Hacking alone won't cut the mustard on this occasion, I need divine inspiration.

...so I went to a life drawing class and wrote poems as the loader trying to resolve symbols:

http://www.virjacode.com/download/hack_no_mustard.jpg

Frederick Virchanza Gotham

unread,
Mar 9, 2023, 6:19:04 AM3/9/23
to
Yesterday, I wrote:
>
> Hacking alone won't cut the mustard on this occasion, I need divine inspiration.


I have this working now. Here's what I did:

Step 1: Link statically with the wxWidgets libraries, and don't link dynamically with the other libraries such as gdk, pango and cairo.

So where you would normally have the following in the Makefile:
$(shell wx-config --libs all)
Replace it with:
$(shell wx-config --libs all | tr ' ' '\n' | grep "\.a$$" | tr '\n' ' ')

Step 2: Allow unresolved symbols in the final executable file, list all symbols, and link lazily:
-Wl,--unresolved-symbols=ignore-in-object-files,--export-dynamic,-z,lazy

Step 3: Write thunks for the two functions whose address is required at runtime (there's only two of them):

gboolean gtk_true(void)
{
static gboolean (*const p)(void) = reinterpret_cast<gboolean(*)(void)>( dlsym(RTLD_NEXT, "gtk_true") );

return p();
}

void gtk_main_do_event(GdkEvent *const arg)
{
static void (*const p)(GdkEvent*) = reinterpret_cast<void(*)(GdkEvent*)>( dlsym(RTLD_NEXT, "gtk_main_do_event") );

p(arg);
}

Step 4: In main, if you need to use the GUI, then load the GUI libraries:

int main(int argc, char **argv)
{
// Before main has been entered, we had:
// pre_start -> _start -> _libc_start_main -> main

if ( argc < 2 )
{
Load_GUI_Libraries();
return wxEntry(argc, argv);
}

cout << "This is the console program :-)" << endl;
}

And finally here's the function for loading the GUI libraries at runtime:

char const *const g_strs_dyn_libs[] = {
"libpng16.so.16",
"libfontconfig.so.1",
"libglib-2.0.so.0",
"libgobject-2.0.so.0",
"libpango-1.0.so.0",
"libpangoft2-1.0.so.0",
"libgio-2.0.so.0",
"libgdk_pixbuf-2.0.so.0",
"libcairo.so.2",
"libpangocairo-1.0.so.0",
"libgdk-3.so.0",
"libgtk-3.so.0",
"libSM.so.6",
"libX11.so.6",
nullptr
};

void Load_GUI_Libraries(void)
{
using std::cerr; using std::endl;

for ( char const *const *pp = g_strs_dyn_libs; nullptr != *pp; ++pp )
{
if ( nullptr == dlopen(*pp, RTLD_LAZY | RTLD_GLOBAL) )
{
cerr << "ERROR loading library: " << *pp << endl;
std::abort();
}
}
}

So now I have an executable file that can run in two modes:
(1) GUI mode - it loads the GUI libraries in 'main'
(2) Console mode - it never loads the GUI libraries

Most importantly - the console mode can run on PC's that don't have the GUI libraries installed.

They're talking about making a movie about my endeavours to get his working, "I Am Legend". I'm not black but Will Smith just wowed them all in the audition so I promptly approved his appointment to play my role.
0 new messages