When one mixes C++ and assmbler code one needs to
know a few details about the way the CPU works. In particular,
one would need to know
1) what registers and variables must be left alone
2) which variables and registers might be manipulated
inside the routine if the contents are restored before
the routine terminates
3) what registers and variables one is free to play with at will.
Where can one find information about such details?
I am particularly interested in stuff relating to VS2008.
Rune
I don't use visual studio, but a Google search brought this
up:
http://msdn.microsoft.com/en-us/library/4ks26t93%28VS.80%29.aspx
--Jonathan
This is implementation of my Window class, I think, everything is clear
here, how you can do it in win32, unfortunately microsoft has banned
assembler from 64 bit environment there you can only
write separate assembler modules:
#include "Window.h"
class Window::Thunk{
friend class Window;
public:
Thunk(Window*);
void SetDestroyed(){ destroyed_ = true; }
void AddRef()
{
++ref_;
}
void ReleaseRef(bool dest = false)
{
--ref_;
if(ref_ <= 0)
if(dest)delete this;
else destroyed_=true;
}
~Thunk(){ delete[] code_; instances_--;}
private:
static void Destroy();
Thunk(const Thunk&);
Thunk& operator=(const Thunk&);
BYTE* code_;
bool destroyed_;
int ref_;
class Test{
public:
~Test(){ if (Thunk::instances_)MessageBox(NULL,"THUNK
INSTANCES","",MB_OK);}
};
static Test test;
static int instances_;
};
Window::Thunk::Test Window::Thunk::test;
Window::Test Window::test;
int Window::instances_ = 0;
int Window::Thunk::instances_ = 0;
Window::Window(bool isDlg)
:hwnd_(0),orig_proc_(0),wnd_class_(0),thunk_(0),ref_(1)
{
thunk_ = new Thunk(this);
wnd_proc_ = (WNDPROC)thunk_->code_;
instances_++;
}
Window::~Window()
{
instances_--;
if(IsWindow())
{
if(orig_proc_)
SetWindowLongPtr(GWLP_WNDPROC,(LONG_PTR)orig_proc_);
else
SetWindowLongPtr(GWLP_WNDPROC,(LONG_PTR)DefWindowProc);
DestroyWindow();
}
if(wnd_class_)::UnregisterClass((LPCTSTR)wnd_class_,(HINSTANCE)GetWindowLongPtr(GWLP_HINSTANCE));
if(thunk_)thunk_->ReleaseRef(true);
}
void Window::Create(DWORD dwExStyle,
LPCTSTR lpClassName,
LPCTSTR lpWindowName,
DWORD dwStyle,
int x,
int y,
int nWidth,
int nHeight,
HWND hWndParent,
HMENU hMenu,
HINSTANCE hInstance,
LPVOID lpParam
)
{
if(hwnd_)
{
MessageBox(NULL,"Window already created","",MB_ICONEXCLAMATION | MB_OK);
return;
}
if(!lpClassName && !wnd_class_)
{
WNDCLASSEX wincl = RegisterClass(wnd_proc_);
if (wnd_class_ = ::RegisterClassEx (&wincl),!wnd_class_)
{
DWORD error = GetLastError();
char buf[4096];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, buf, sizeof
buf,NULL);
throw std::runtime_error(buf);
}
}
hwnd_ = ::CreateWindowEx(dwExStyle,
lpClassName?lpClassName:(LPCTSTR)wnd_class_,
lpWindowName,
dwStyle,
x,
y,
nWidth,
nHeight,
hWndParent,
hMenu,
hInstance,
lpParam);
if (!hwnd_)
{
DWORD error = GetLastError();
char buf[4096];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, buf, sizeof
buf,NULL);
throw std::runtime_error(buf);
}
WNDPROC proc =
(WNDPROC)SetWindowLongPtr(GWLP_WNDPROC,(LONG_PTR)wnd_proc_);
if (proc != wnd_proc_)orig_proc_ = proc;
}
LRESULT CALLBACK Window::WindowProc(Window* pThis,HWND hwnd,UINT
message,WPARAM wParam,LPARAM lParam)
{
LRESULT rc;Thunk* pThunk=pThis->thunk_;
if(pThis->hwnd_ != hwnd)pThis->hwnd_ = hwnd;
try
{
AutoRef<Thunk> apThunk(pThunk);
apThunk->AddRef();
rc = pThis->ProcessMessages(hwnd,message,wParam,lParam);
}catch(...)
{
if(pThunk->destroyed_)delete pThunk;
throw;
}
return rc;
}
__declspec(naked) LRESULT CALLBACK Window::CodeProc(HWND hwnd,UINT
message,WPARAM wParam,LPARAM lParam)
{
Window* pThis;Thunk* pThunk;
LRESULT rc;
__asm
{
mov eax, end;
sub eax, begin; // eax == size
mov ebx, tag;
sub ebx, begin; // ebx == tag offset
mov ecx, begin;
ret;
}
begin:
__asm
{
push ebp;
mov ebp,esp;
sub esp,__LOCAL_SIZE;
}
__asm mov dword ptr [pThis],0x0;
tag:
pThunk = pThis->thunk_;
__asm // rc = WindowProc(pThis,hwnd,message,wParam,lParam);
{
push dword ptr[lParam];
push dword ptr[wParam];
push dword ptr[message];
push dword ptr[hwnd];
push dword ptr[pThis];
mov edx, dword ptr[Window::WindowProc];
call edx;
mov dword ptr[rc], eax;
}
if(pThunk->destroyed_)
{
__asm
{
mov eax, dword ptr[rc];
mov ecx, dword ptr[pThunk];
mov esp,ebp;
pop ebp;
mov edx, dword ptr[Window::Thunk::Destroy];
jmp edx; // we don't wont Destroy to return since this function will
be deleted
}
}
__asm
{
mov eax,dword ptr [rc];
mov esp,ebp;
pop ebp;
ret 0x10; // __stdcall stack cleanup
}
end:;
}
Window::Thunk::Thunk(Window* pThis):destroyed_(false),ref_(1)
{
instances_++;
size_t lsize,lmod,lbegin;
__asm
{
call Window::CodeProc;
mov dword ptr[lsize], eax;
mov dword ptr[lmod], ebx;
mov dword ptr[lbegin], ecx;
}
code_ = new BYTE[lsize];
void *pDst = code_;
__asm //memcpy(pDst,code,lsize);
{
mov esi, dword ptr[lbegin];
mov edi, dword ptr[pDst];
mov ecx, dword ptr[lsize];
cld;
rep movsb;
}
__asm // modify mov var,0 to mov var,address
{
mov eax, dword ptr[pThis];
mov ebx, dword ptr[pDst];
add ebx, dword ptr[lmod];
mov dword ptr[ebx-4], eax;
}
DWORD oldprotect;
if(!VirtualProtect(code_, lsize, PAGE_EXECUTE_READWRITE, &oldprotect))
{
DWORD error = GetLastError();
char buf[4096];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, buf, sizeof
buf,NULL);
delete code_;
throw std::runtime_error(buf);
}
FlushInstructionCache(GetCurrentProcess(), code_, lsize);
}
__declspec(naked) void Window::Thunk::Destroy()
{
Thunk* pThunk;
__asm
{
push ebp;
mov ebp,esp;
sub esp,__LOCAL_SIZE;
push eax; // save rc
mov dword ptr[pThunk],ecx
}
delete pThunk;
__asm
{
pop eax; // restore rc
mov esp,ebp;
pop ebp;
ret 0x10; //// __stdcall stack cleanup from code proc
}
}
Greets, hope this helps...
http://en.wikipedia.org/wiki/Calling_convention
and:
http://en.wikipedia.org/wiki/X86_calling_conventions
these give a general overview and provides a few useful links.
for example:
http://www.agner.org/optimize/calling_conventions.pdf
however, if you are intending to mix C++ and ASM, it is suggested to use the
'extern "C"' modifier for both imported and exported code, as this makes the
job a lot easier.
extern "C" {
void foo() //foo does not use name mangling
{
...
}
};
void bar() //but bar may be name-mangled
{
...
}
particularly of note is 'cdecl' (Win32) and the Win64 calling convention.
if 'extern "C"' is not used, then one has to deal with the C++ ABI, which is
generally compiler specific, and also more complicated (may involve name
mangling, additional "hidden" parameters, concern over things like object
and vtable layout, ... which can be generally avoided by pretending all is
C...).
> Rune
I don't do Win32 programming, but your example makes it reasonably
clear that they did *not* ban assembly; they simply removed the C++
extensions for inline assembly from their compiler. Some would say
that was a wise choice.
/Jorgen
--
// Jorgen Grahn <grahn@ Oo o. . .
\X/ snipabacken.se> O o .
They say they did that because of portability. Actually, assembler
is not a problem there, rather their win32 api.
Having to write separate assembler modules forces you to write
lot of assembler, except just on places where winapi
forces you to use assembler anyway ;)
Xlib is better designed that that thing...
Greets
Yes, most of the "interesting" instructions are available as compiler
intrinsics, so you can still use them.
Mixing a few assembly instructions in the middle of a C or C++
function tended to disturb the optimizer so that the surrounding code
was less well optimized. That took away most of the advantage of using
assembly to gain extra speed, so any performance bottlenecks would
generally have to be coded separately anyway.
Bo Persson
In my practice, I often use "GCC-Inline-Assembly-HOWTO" by Sandeep.S and
I am usually satisfied with the generated code. See for example
http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html#ss5.3
for the discussion of how to let the compiler know which exactly
registers are clobbered by an inline assembler fragment.
, so any performance bottlenecks would
> generally have to be coded separately anyway.
In my practice, the required assembler fragments are usually small (most
often, I resort to them to use a particularly beneficial instruction
that compiler would not be able to infer); putting them in separate
included files is sometimes feasible but not to separate compilation
units. At my desired optimization level, the compilers I use perform
global in-lining within a compilation unit (I mean optimizer's
opportunistic inlining which is not limited by C++ functions declared
inline); having assembler snippets in separate units would prevent this
from happening.
>
>
> Bo Persson
>
>
Yes, but the interesting instructions that do not map to C or C++
constructs are available as compiler intrinsics (equivalent to
__builtin_x for gcc). These instructions are known to the optimizer,
and that knowledge is taken advantage of.
http://msdn.microsoft.com/en-us/library/azcs88h2%28VS.100%29.aspx
Therefore the lack of inline assembly for x64 is hardly ever
important.
Bo Persson
It is important, because with __declspec(naked) you could
implement function in assembler, and use c++ as macro assembler.
Problem is that if you want to write something that you cannot
do in c++, like to thunk winprocs, in 64 bit environment
you have to embed binary code, instead of assembly, which is
step backwards...assembler is easier to read....
You don;t wont optimizer to mess with code in this case anyway...
For example my window remembers pointer in run time generated
thunk (same thing is implemented in WTL, but they use
binary code, instead of assembly)...
This is faster way of implementing it than using global table of
window pointers like mfc does, because there is no lookup
every time proc is called...
Greets