Using the following program:
--------------------------------------------------
#include <stdio.h>
#if _WIN32
  typedef unsigned __int64       ullong_t;
#else
  typedef unsigned long long     ullong_t;
#endif
static ullong_t         x = 1234567890987654321/*ULL*/;
static ullong_t            c = 123456123456123456/*ULL*/;
static ullong_t         y = 362436362436362436/*ULL*/;
static ullong_t         z = 1066149217761810/*ULL*/;
static ullong_t         t;
#define MWC     (t = (x<<58)+c, c = (x>>6), x+=t, c+=(x<t), x)
#define XSH     (y ^= (y<<13), y ^= (y>>17), y ^= (y<<43))
#define CNG     (z = 6906969069/*LL*/ * z + 1234567)
#define KISS    (MWC + XSH + CNG)
int main(void)
{
     int     i;
     for (i = 0;  i < 100000000;  i++)
         t = KISS;
     if (t == 1666297717051644203/*ULL*/)
         printf("100 million uses of KISS OK");
     else
         printf("Fail");
     return 0;
}
---------------------------------------------------
I get the following results:
Pelles-c -Ox	3.712
lcc-win -O	2.7 seconds
MSVC -Ox	2.55 seconds
gcc -O2		2.0
The difference between MSVC and lcc-win is that lcc-win uses
2 temporary variables in memory, and MSVC only registers.
Gcc is in another league... it maintains 2 64 bit variables
in the registerss at the same time... Amazing.
The new version should be available shortly. I have uploaded
tonight.
-- 
jacob navia
jacob at jacob point remcomp point fr
logiciels/informatique
http://www.cs.virginia.edu/~lcc-win32