Fast trunc() algorithm

Bonita Montero

unread,

May 29, 2022, 4:14:38 AM5/29/22

to

The trunc() implementation of VC++'s runtime was too slow for me,
so I've written my own:

#include <cstdint>
#include <limits>
#include <cfenv>
#include <cstring>

using namespace std;

double ftrunc( double d )
{
static_assert(sizeof(double) == 8 && numeric_limits<double>::is_iec559,
"double must be IEEE-754");
// assume size_t is our CPU's native register-width
static_assert(sizeof(size_t) == 8 || sizeof(size_t) == 4,
"register-width must be 32 or 64 bit");
if constexpr( sizeof(size_t) == 8 )
// we have 64 bit registers
{
unsigned const
MANTISSA_BITS = 52,
EXP_BIAS = 0x3FF,
INF_NAN_BASE = 0x7FF;
uint64_t const
EXP_MASK = (uint64_t)0x7FF << MANTISSA_BITS,
SIGN_MASK = (uint64_t)0x800 << MANTISSA_BITS ,
MANTISSA_MASK = 0x000FFFFFFFFFFFFFu,
NAN_MASK = 0x0008000000000000u,
MIN_INTEGRAL_DIGITS_EXP = (uint64_t)EXP_BIAS << MANTISSA_BITS,
MIN_INTEGRAL_ONLY_EXP = (uint64_t)(EXP_BIAS + MANTISSA_BITS) <<
MANTISSA_BITS,
INF_NAN_EXP = (uint64_t)INF_NAN_BASE << MANTISSA_BITS;
int64_t const MANTISSA_SHIFT_MASK = 0xFFF0000000000000u;
uint64_t dx;
memcpy( &dx, &d, 8 );
auto retBack = [&]() -> double
{
memcpy( &d, &dx, 8 );
return d;
};
uint64_t exp = dx & EXP_MASK;
if( exp >= MIN_INTEGRAL_DIGITS_EXP )
// value has integral digits
if( exp < MIN_INTEGRAL_ONLY_EXP )
{
// there are fraction-digits to mask out, mask them
unsigned shift = (unsigned)(exp >> MANTISSA_BITS) - EXP_BIAS;
dx &= MANTISSA_SHIFT_MASK >> shift;
return retBack();
}
else
if( exp < INF_NAN_EXP )
// value is integral
return d;
else
{
uint64_t mantissa = dx & MANTISSA_MASK;
// infinite, NaN: return value
if( !mantissa || mantissa & NAN_MASK )
return retBack();
// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );
return retBack();
}
else
{
// below +/-1.0
// return +/-0.0
dx &= SIGN_MASK;
return retBack();
}
}
else
// we have 32 bit registers
{
unsigned const
MANTISSA_BITS = 52,
HI_MANTISSA_BITS = 20,
EXP_BIAS = 0x3FF,
INF_NAN_BASE = 0x7FF;
uint32_t const
EXP_MASK = (uint32_t)0x7FFu << HI_MANTISSA_BITS,
SIGN_MASK = (uint32_t)0x800u << HI_MANTISSA_BITS,
HI_MANTISSA_MASK = 0x000FFFFFu,
NAN_MASK = 0x00080000u,
MIN_INTEGRAL_DIGITS_EXP = (uint32_t) EXP_BIAS << HI_MANTISSA_BITS,
MAX_INTEGRAL32_EXP = (uint32_t)(EXP_BIAS + HI_MANTISSA_BITS) <<
HI_MANTISSA_BITS,
MIN_INTEGRAL_ONLY_EXP = (uint32_t)(EXP_BIAS + MANTISSA_BITS) <<
HI_MANTISSA_BITS,
INF_NAN_EXP = (uint32_t)INF_NAN_BASE << HI_MANTISSA_BITS,
NEG_LO_MANTISSA_SHIFT_MASK = 0xFFFFFFFFu;
int32_t const HI_MANTISSA_SHIFT_MASK = 0xFFF00000;
uint64_t dx;
memcpy( &dx, &d, 8 );
uint32_t
lo = (uint32_t)dx,
hi = (uint32_t)(dx >> 32);
auto retBack = [&]() -> double
{
dx = lo | (uint64_t)hi << 32;
memcpy( &d, &dx, 8 );
return d;
};
uint32_t exp = hi & EXP_MASK;
if( exp >= MIN_INTEGRAL_DIGITS_EXP )
// value has integral digits
if( exp < MIN_INTEGRAL_ONLY_EXP )
// there are fraction-digits to mask out
if( exp <= MAX_INTEGRAL32_EXP )
{
// the fraction digits are in the upper dword, mask them and zero
the lower dword
unsigned shift = (unsigned)(exp >> HI_MANTISSA_BITS) - EXP_BIAS;
hi &= HI_MANTISSA_SHIFT_MASK >> shift;
lo = 0;
return retBack();
}
else
{
// the fraction digits are in the lower dword, mask them
unsigned shift = (unsigned)(exp >> HI_MANTISSA_BITS) - EXP_BIAS -
HI_MANTISSA_BITS;
lo &= ~(NEG_LO_MANTISSA_SHIFT_MASK >> shift);
return retBack();
}
else
if( exp < INF_NAN_EXP )
// value is integral
return retBack();
else
{
uint32_t hiMantissa = hi & HI_MANTISSA_MASK;
// infinite, NaN: return value
if( !(hiMantissa | lo) || hiMantissa & NAN_MASK )
return retBack();
// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );
return retBack();
}
else
{
// below +/-1.0
// return +/-0.0
hi &= SIGN_MASK;
lo = 0;
return retBack();
}
}
}

float ftrunc( float f )
{
static_assert(sizeof(float) == 4, "sizeof(float) not equal to
sizeof(uint32_t)");
static_assert(numeric_limits<float>::is_iec559, "float must be IEEE-754");
unsigned const
MANTISSA_BITS = 23,
EXP_BIAS = 0x7F,
INF_NAN_BASE = 0xFF;
uint32_t const
EXP_MASK = (uint32_t)0xFF << MANTISSA_BITS,
SIGN_MASK = (uint32_t)0x100 << MANTISSA_BITS,
MANTISSA_MASK = 0x007FFFFFu,
NAN_MASK = 0x00400000u,
MIN_INTEGRAL_DIGITS_EXP = (uint32_t) EXP_BIAS << MANTISSA_BITS,
MIN_INTEGRAL_ONLY_EXP = (uint32_t)(EXP_BIAS + MANTISSA_BITS) <<
MANTISSA_BITS,
INF_NAN_EXP = (uint32_t)INF_NAN_BASE << MANTISSA_BITS;
int32_t const MANTISSA_SHIFT_MASK = 0xFF800000u;
uint32_t fx;
memcpy( &fx, &f, 4 );
auto retBack = [&]() -> float
{
memcpy( &f, &fx, 4 );
return f;
};
uint32_t exp = fx & EXP_MASK;
if( exp >= MIN_INTEGRAL_DIGITS_EXP )
// value has integral digits
if( exp < MIN_INTEGRAL_ONLY_EXP )
{
// there are fraction-digits to mask out, mask them
unsigned shift = (unsigned)(exp >> MANTISSA_BITS) - EXP_BIAS;
fx &= MANTISSA_SHIFT_MASK >> shift;
return retBack();
}
else
if( exp < INF_NAN_EXP )
// value is integral
return retBack();
else
{
uint32_t mantissa = fx & MANTISSA_MASK;
// infinite, NaN: return value
if( !mantissa || mantissa & NAN_MASK )
return retBack();
// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );
return retBack();
}
else
{
// below +/-1.0
// return +/-0.0
fx &= SIGN_MASK;
return retBack();
}
}

memcpy() is the only valid way to have writeable aliasing in C++.
memcpy() is normally an intrinsic function and in the above code
the memcpy()s between integral and floating point types are just
moves between general purpose and FPU-registers.

Mut...@dastardlyhq.com

unread,

May 29, 2022, 4:21:16 AM5/29/22

to

On Sun, 29 May 2022 10:14:33 +0200
Bonita Montero <Bonita....@gmail.com> wrote:
>The trunc() implementation of VC++'s runtime was too slow for me,
>so I've written my own:

What do you want, a medal? A scooby snack?

Bonita Montero

unread,

May 29, 2022, 4:29:40 AM5/29/22

to

No I don't have your self esteem problems.

Freethinker

unread,

May 29, 2022, 9:20:51 AM5/29/22

to

Indeed: you have orders of magnitude bigger ones!

Bonita Montero

unread,

May 29, 2022, 9:27:41 AM5/29/22

to

Are there only nerd-idiots around here ?

Freethinker

unread,

May 29, 2022, 9:37:19 AM5/29/22

to

Luckily not. Most (possibly all of them -1) people here are very nice
and very knowledgeable.
They also seem to master their ego problems very well, if they have any.
You should learn a great deal from them.

Bonita Montero

unread,

May 29, 2022, 9:46:56 AM5/29/22

to

Of course, that's while they want to dictate their insecure feeling
(it's all the personality !) when using using namespace std to me.
Complete idiots.

John McCue

unread,

May 29, 2022, 5:33:14 PM5/29/22

to

Trimmed followups to comp.lang.c

Well if I had a scobby snack I would send it to Bonita.

Thanks Bonita, gives me something to play with :)
John

--
[t]csh(1) - "An elegant shell, for a more... civilized age."
- Paraphrasing Star Wars

Christian Gollwitzer

unread,

May 29, 2022, 5:36:40 PM5/29/22

to

Am 29.05.22 um 10:14 schrieb Bonita Montero:

>
> memcpy() is the only valid way to have writeable aliasing in C++.
> memcpy() is normally an intrinsic function and in the above code
> the memcpy()s between integral and floating point types are just
> moves between general purpose and FPU-registers.

How about reinterpret_cast<> ?

Christian

Juha Nieminen

unread,

May 30, 2022, 2:22:25 AM5/30/22

to

Doing type punning, ie. interpreting the bit representation of some type
as a value of an incompatible type, by reinterpret-casting a pointer to
that value, or by using a union, is technically speaking UB in C++
(but allowed in C, I think since C11 or the like), even though in
practice it does work (for the same reason it does in C).
The only "official" way of doing it (prior to C++20) was to use
std::memcpy() (and hope that the compiler will optimize it into
equivalent code as using a union).

C++20 added an official way of doing type punning: std::bit_cast.

David Brown

unread,

May 30, 2022, 2:59:23 AM5/30/22

to

On 30/05/2022 08:22, Juha Nieminen wrote:
> In comp.lang.c++ Christian Gollwitzer <auri...@gmx.de> wrote:
>> Am 29.05.22 um 10:14 schrieb Bonita Montero:
>>>
>>> memcpy() is the only valid way to have writeable aliasing in C++.
>>> memcpy() is normally an intrinsic function and in the above code
>>> the memcpy()s between integral and floating point types are just
>>> moves between general purpose and FPU-registers.
>>
>> How about reinterpret_cast<> ?
>
> Doing type punning, ie. interpreting the bit representation of some type
> as a value of an incompatible type, by reinterpret-casting a pointer to
> that value, or by using a union, is technically speaking UB in C++
> (but allowed in C, I think since C11 or the like), even though in
> practice it does work (for the same reason it does in C).

Using unions for type-punning has, AFAIUI, always been defined behaviour
in C. The wording was changed in C99 to make it clear that it is
allowed, but the way the change was done (as a footnote) indicates that
this was meant as a clarification, not a change to the language. In
C++, however, using unions for type-punning has always been UB. (This
can be viewed as an indication that C++ is more strongly typed than C.)

(You are of course correct that reinterpret_cast can't be used for
accessing one type as an incompatible type.)

> The only "official" way of doing it (prior to C++20) was to use
> std::memcpy() (and hope that the compiler will optimize it into
> equivalent code as using a union).

I believe you could also write your own memcpy version - using char or
unsigned char to access the data, or std::byte in C++17. (You might
want that if you have a poorly optimising compiler that uses a library
call for memcpy even in simple cases.)

>
> C++20 added an official way of doing type punning: std::bit_cast.

Yes.

mut...@dastardlyhq.com

unread,

May 30, 2022, 3:52:30 AM5/30/22

to

On Mon, 30 May 2022 06:22:09 -0000 (UTC)
Juha Nieminen <nos...@thanks.invalid> wrote:
>C++20 added an official way of doing type punning: std::bit_cast.

Haven't heard of that. Useful to know. Thanks.

Paavo Helde

unread,

May 30, 2022, 4:58:52 AM5/30/22

to

30.05.2022 09:22 Juha Nieminen kirjutas:

> Doing type punning, ie. interpreting the bit representation of some type
> as a value of an incompatible type, by reinterpret-casting a pointer to
> that value, or by using a union, is technically speaking UB in C++
> (but allowed in C, I think since C11 or the like), even though in
> practice it does work (for the same reason it does in C).

It might work sometimes, until it doesn't. Here is a counter-example. It
uses std::launder() (which ought to be kind of a null-op, not quite sure
why using it makes a difference).

$ cat test1.cpp
#include <iostream>
#include <new>
int main() {
float x = 3.14;
int y = *std::launder(reinterpret_cast<int*>(&x));
std::cout << y << "\n";
}

$ g++ -std=c++20 -O0 -Wall test1.cpp
$ ./a.exe
1078523331

$ g++ -std=c++20 -O3 -Wall test1.cpp
$ ./a.exe
0

You can see with -O3 it has "optimized" the result to be 0. It has all
the rights to do that because this code contains UB-s.

With memcpy it works as intended also with -O3:

$ cat test1.cpp
#include <iostream>
#include <cstring>
int main() {
float x = 3.14;
int y;
std::memcpy(&y, &x, sizeof(int));
std::cout << y << "\n";
}

$ g++ -std=c++20 -O0 -Wall test1.cpp
$ ./a.exe
1078523331
$ g++ -std=c++20 -O3 -Wall test1.cpp
$ ./a.exe
1078523331

$ g++ -v
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-pc-cygwin/10/lto-wrapper.exe
Target: x86_64-pc-cygwin
Configured with:
/mnt/share/cygpkgs/gcc/gcc.x86_64/src/gcc-10.2.0/configure
--srcdir=/mnt/share/cygpkgs/gcc/gcc.x86_64/src/gcc-10.2.0 --prefix=/usr
--exec-prefix=/usr --localstatedir=/var --sysconfdir=/etc
--docdir=/usr/share/doc/gcc --htmldir=/usr/share/doc/gcc/html -C
--build=x86_64-pc-cygwin --host=x86_64-pc-cygwin
--target=x86_64-pc-cygwin --without-libiconv-prefix
--without-libintl-prefix --libexecdir=/usr/lib
--with-gcc-major-version-only --enable-shared --enable-shared-libgcc
--enable-static --enable-version-specific-runtime-libs
--enable-bootstrap --enable-__cxa_atexit --with-dwarf2
--with-tune=generic --enable-languages=c,c++,fortran,lto,objc,obj-c++
--enable-graphite --enable-threads=posix --enable-libatomic
--enable-libgomp --enable-libquadmath --enable-libquadmath-support
--disable-libssp --enable-libada --disable-symvers --with-gnu-ld
--with-gnu-as --with-cloog-include=/usr/include/cloog-isl
--without-libiconv-prefix --without-libintl-prefix --with-system-zlib
--enable-linker-build-id --with-default-libstdcxx-abi=gcc4-compatible
--enable-libstdcxx-filesystem-ts
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 10.2.0 (GCC)

Bonita Montero

unread,

May 30, 2022, 5:32:28 AM5/30/22

to

reinterpret_cast<int_type &>(floatTyped) didn't work wheh I tried
this the last time. But for me this isn't a problem since memcpy
works and is intrinsically optimized with MSVC and clang-cl.

Bonita Montero

unread,

May 30, 2022, 5:34:20 AM5/30/22

to

I had a look at the g++ code. g++ is even faster because it simply
does an integer-store and load in series and checks for the overflow
-flag while storing. If it is sed to choses another, more complex,
path.
Maybe there are some MSVC-intrinsics that allow this conversion from
float to int and to have a char, signalling the overflow-flag.

Bonita Montero

unread,

May 30, 2022, 11:41:06 PM5/30/22

to

The code can be much simpler and faster:

#include <cstdint>
#include <limits>
#include <cfenv>
#include <cstring>

#include "fmath.h"

using namespace std;

double ftrunc( double d )
{

unsigned const
MANTISSA_BITS = 52,
EXP_BIAS = 0x3FF,
INF_NAN_BASE = 0x7FF;

constexpr uint64_t
EXP_MASK = (uint64_t)0x7FF << MANTISSA_BITS,

MANTISSA_MASK = 0x000FFFFFFFFFFFFFu,
NAN_MASK = 0x0008000000000000u,

MIN_PURE_INTEGRAL_EXP = (uint64_t)(EXP_BIAS + MANTISSA_BITS) <<

MANTISSA_BITS,
INF_NAN_EXP = (uint64_t)INF_NAN_BASE << MANTISSA_BITS;

uint64_t dx;
memcpy( &dx, &d, 8 );

uint64_t exp = dx & EXP_MASK;

if( exp < MIN_PURE_INTEGRAL_EXP ) [[likely]]
return (double)(int64_t)d;
if( exp < INF_NAN_EXP ) [[likely]]
// value has integral only mantissa bits
return d;

uint64_t mantissa = dx & MANTISSA_MASK;
// infinite, NaN: return value

if( !mantissa || mantissa & NAN_MASK ) [[likely]]
return d;

// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );

return d;

}

float ftrunc( float f )
{
static_assert(sizeof(float) == 4, "sizeof(float) not equal to
sizeof(uint32_t)");
static_assert(numeric_limits<float>::is_iec559, "float must be IEEE-754");

constexpr unsigned

MANTISSA_BITS = 23,
EXP_BIAS = 0x7F,
INF_NAN_BASE = 0xFF;

constexpr uint32_t

EXP_MASK = (uint32_t)0xFF << MANTISSA_BITS,
SIGN_MASK = (uint32_t)0x100 << MANTISSA_BITS,
MANTISSA_MASK = 0x007FFFFFu,
NAN_MASK = 0x00400000u,

MIN_PURE_INTEGRAL_EXP = (uint64_t)(EXP_BIAS + MANTISSA_BITS) <<
MANTISSA_BITS,
INF_NAN_EXP = (uint32_t)INF_NAN_BASE << MANTISSA_BITS;

uint32_t fx;
memcpy( &fx, &f, 4 );

uint32_t exp = fx & EXP_MASK;

if( exp < MIN_PURE_INTEGRAL_EXP )
return (float)(int64_t)f;
if( exp < INF_NAN_EXP )
// value has integral only mantissa bits
return f;

uint32_t mantissa = fx & MANTISSA_MASK;
// infinite, NaN: return value
if( !mantissa || mantissa & NAN_MASK )

return f;

// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );

return f;
}

Bonita Montero

unread,

Jun 1, 2022, 12:08:02 PM6/1/22

to

Unfortunately VC++ calls its own conversion algorithm when converting
from a floating point value to an integer. That makes the before posted
not significantly faster than that of MSVC++ and even slower than that
before.
clang-cl, the mostly MSVC++-compatible version of clang, doesn't do
that but uses the SSE-instructions to convert from floating point to
integer directly and the code runs a lot faster with that. Because
this isn't reliable when you won't switch to clang-cl (the C++20
-featues are partitially a disaster and I've a lot of code that
does compile with current MSVC++ and g++ 11.x) I re-wrote the whole
thing in assembly. Here it is:

PUBLIC ?ftrunc@@YANN@Z

_TEXT SEGMENT
?ftrunc@@YANN@Z PROC
cvttsd2si rax, xmm0
mov rcx, 08000000000000000h
cmp rax, rcx
je complex
cvtsi2sd xmm0, rax
ret
complex:
shr rcx, 52
and rcx, 07FFh
cmp rcx, 07FFh
jne asIs
mov rcx, 0FFFFFFFFFFFFFh
test rax, rcx
jz asIs
mov rcx, 08000000000000h
test rax, rcx
jnz asIs
stmxcsr dword ptr [esp - 4]
or byte ptr [esp - 4], 1
ldmxcsr dword ptr [esp - 4]
ret
asIs:
ret
?ftrunc@@YANN@Z ENDP

?ftrunc@@YAMM@Z PROC
cvttsd2si eax, xmm0
cmp eax, 080000000h
je complex
cvtsi2sd xmm0, eax
ret
complex:
mov ecx, eax
shr ecx, 23
and ecx, 07Fh
cmp ecx, 07Fh
jne asIs
test eax, 07FFFFFh
jz asIs
test eax, 0800000h
jnz asIs
stmxcsr dword ptr [esp - 4]
or byte ptr [esp - 4], 1
ldmxcsr dword ptr [esp - 4]
ret
asIs:
ret
?ftrunc@@YAMM@Z ENDP
_TEXT ENDS

END

You might think why I post assembly code here.
But that's still valid for MSVC C++ programmers only.

Bonita Montero

unread,

Jun 1, 2022, 12:09:44 PM6/1/22

to

> test eax, 0800000h
test eax, 0400000h

Bonita Montero

unread,

Jun 1, 2022, 10:43:09 PM6/1/22

to

There's even a simpler way with SSE 4.1:

inline double ftrunc( double d )
{
__m128d mm;
mm.m128d_f64[0] = d;
_mm_round_sd( mm, mm, _MM_FROUND_FLOOR );
return mm.m128d_f64[0];
}

inline double ftrunc( float f )
{
__m128 mm;
mm.m128_f32[0] = f;
_mm_round_ss( mm, mm, _MM_FROUND_FLOOR );
return mm.m128_f32[0];
}

With that a simple double trunc() 11.6 times faster on my CPU.

jak

unread,

Jun 2, 2022, 1:44:20 AM6/2/22

to

#include <math.h>

double myTrunc(double value)
{
return value - fmod(value, 1.);
}

Bonita Montero

unread,

Jun 2, 2022, 2:14:02 AM6/2/22

to

That solution is 194 times slower with g++ 11.1 and 188 times
slower with clang 13. Seems the compilers don't do any special
optimization if the divisor 1.0.

jak

unread,

Jun 2, 2022, 5:14:41 AM6/2/22

to

compared with your solution or with the trunc function of msvc?
also, I think you will need to verify that the SEE feature exists
and is enabled on the processor before using it.

Bonita Montero

unread,

Jun 2, 2022, 6:48:35 AM6/2/22

to

This featrure came with the first Intel i-series processors.
The generation before were the Core II processors, these are
so ancient and homeopathically used today that I can silently
assume that the code runs.