Fast trunc() algorithm

Bonita Montero

unread,

May 29, 2022, 4:14:25 AM5/29/22

to

The trunc() implementation of VC++'s runtime was too slow for me,
so I've written my own:

#include <cstdint>
#include <limits>
#include <cfenv>
#include <cstring>

using namespace std;

double ftrunc( double d )
{
static_assert(sizeof(double) == 8 && numeric_limits<double>::is_iec559,
"double must be IEEE-754");
// assume size_t is our CPU's native register-width
static_assert(sizeof(size_t) == 8 || sizeof(size_t) == 4,
"register-width must be 32 or 64 bit");
if constexpr( sizeof(size_t) == 8 )
// we have 64 bit registers
{
unsigned const
MANTISSA_BITS = 52,
EXP_BIAS = 0x3FF,
INF_NAN_BASE = 0x7FF;
uint64_t const
EXP_MASK = (uint64_t)0x7FF << MANTISSA_BITS,
SIGN_MASK = (uint64_t)0x800 << MANTISSA_BITS ,
MANTISSA_MASK = 0x000FFFFFFFFFFFFFu,
NAN_MASK = 0x0008000000000000u,
MIN_INTEGRAL_DIGITS_EXP = (uint64_t)EXP_BIAS << MANTISSA_BITS,
MIN_INTEGRAL_ONLY_EXP = (uint64_t)(EXP_BIAS + MANTISSA_BITS) <<
MANTISSA_BITS,
INF_NAN_EXP = (uint64_t)INF_NAN_BASE << MANTISSA_BITS;
int64_t const MANTISSA_SHIFT_MASK = 0xFFF0000000000000u;
uint64_t dx;
memcpy( &dx, &d, 8 );
auto retBack = [&]() -> double
{
memcpy( &d, &dx, 8 );
return d;
};
uint64_t exp = dx & EXP_MASK;
if( exp >= MIN_INTEGRAL_DIGITS_EXP )
// value has integral digits
if( exp < MIN_INTEGRAL_ONLY_EXP )
{
// there are fraction-digits to mask out, mask them
unsigned shift = (unsigned)(exp >> MANTISSA_BITS) - EXP_BIAS;
dx &= MANTISSA_SHIFT_MASK >> shift;
return retBack();
}
else
if( exp < INF_NAN_EXP )
// value is integral
return d;
else
{
uint64_t mantissa = dx & MANTISSA_MASK;
// infinite, NaN: return value
if( !mantissa || mantissa & NAN_MASK )
return retBack();
// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );
return retBack();
}
else
{
// below +/-1.0
// return +/-0.0
dx &= SIGN_MASK;
return retBack();
}
}
else
// we have 32 bit registers
{
unsigned const
MANTISSA_BITS = 52,
HI_MANTISSA_BITS = 20,
EXP_BIAS = 0x3FF,
INF_NAN_BASE = 0x7FF;
uint32_t const
EXP_MASK = (uint32_t)0x7FFu << HI_MANTISSA_BITS,
SIGN_MASK = (uint32_t)0x800u << HI_MANTISSA_BITS,
HI_MANTISSA_MASK = 0x000FFFFFu,
NAN_MASK = 0x00080000u,
MIN_INTEGRAL_DIGITS_EXP = (uint32_t) EXP_BIAS << HI_MANTISSA_BITS,
MAX_INTEGRAL32_EXP = (uint32_t)(EXP_BIAS + HI_MANTISSA_BITS) <<
HI_MANTISSA_BITS,
MIN_INTEGRAL_ONLY_EXP = (uint32_t)(EXP_BIAS + MANTISSA_BITS) <<
HI_MANTISSA_BITS,
INF_NAN_EXP = (uint32_t)INF_NAN_BASE << HI_MANTISSA_BITS,
NEG_LO_MANTISSA_SHIFT_MASK = 0xFFFFFFFFu;
int32_t const HI_MANTISSA_SHIFT_MASK = 0xFFF00000;
uint64_t dx;
memcpy( &dx, &d, 8 );
uint32_t
lo = (uint32_t)dx,
hi = (uint32_t)(dx >> 32);
auto retBack = [&]() -> double
{
dx = lo | (uint64_t)hi << 32;
memcpy( &d, &dx, 8 );
return d;
};
uint32_t exp = hi & EXP_MASK;
if( exp >= MIN_INTEGRAL_DIGITS_EXP )
// value has integral digits
if( exp < MIN_INTEGRAL_ONLY_EXP )
// there are fraction-digits to mask out
if( exp <= MAX_INTEGRAL32_EXP )
{
// the fraction digits are in the upper dword, mask them and zero
the lower dword
unsigned shift = (unsigned)(exp >> HI_MANTISSA_BITS) - EXP_BIAS;
hi &= HI_MANTISSA_SHIFT_MASK >> shift;
lo = 0;
return retBack();
}
else
{
// the fraction digits are in the lower dword, mask them
unsigned shift = (unsigned)(exp >> HI_MANTISSA_BITS) - EXP_BIAS -
HI_MANTISSA_BITS;
lo &= ~(NEG_LO_MANTISSA_SHIFT_MASK >> shift);
return retBack();
}
else
if( exp < INF_NAN_EXP )
// value is integral
return retBack();
else
{
uint32_t hiMantissa = hi & HI_MANTISSA_MASK;
// infinite, NaN: return value
if( !(hiMantissa | lo) || hiMantissa & NAN_MASK )
return retBack();
// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );
return retBack();
}
else
{
// below +/-1.0
// return +/-0.0
hi &= SIGN_MASK;
lo = 0;
return retBack();
}
}
}

float ftrunc( float f )
{
static_assert(sizeof(float) == 4, "sizeof(float) not equal to
sizeof(uint32_t)");
static_assert(numeric_limits<float>::is_iec559, "float must be IEEE-754");
unsigned const
MANTISSA_BITS = 23,
EXP_BIAS = 0x7F,
INF_NAN_BASE = 0xFF;
uint32_t const
EXP_MASK = (uint32_t)0xFF << MANTISSA_BITS,
SIGN_MASK = (uint32_t)0x100 << MANTISSA_BITS,
MANTISSA_MASK = 0x007FFFFFu,
NAN_MASK = 0x00400000u,
MIN_INTEGRAL_DIGITS_EXP = (uint32_t) EXP_BIAS << MANTISSA_BITS,
MIN_INTEGRAL_ONLY_EXP = (uint32_t)(EXP_BIAS + MANTISSA_BITS) <<
MANTISSA_BITS,
INF_NAN_EXP = (uint32_t)INF_NAN_BASE << MANTISSA_BITS;
int32_t const MANTISSA_SHIFT_MASK = 0xFF800000u;
uint32_t fx;
memcpy( &fx, &f, 4 );
auto retBack = [&]() -> float
{
memcpy( &f, &fx, 4 );
return f;
};
uint32_t exp = fx & EXP_MASK;
if( exp >= MIN_INTEGRAL_DIGITS_EXP )
// value has integral digits
if( exp < MIN_INTEGRAL_ONLY_EXP )
{
// there are fraction-digits to mask out, mask them
unsigned shift = (unsigned)(exp >> MANTISSA_BITS) - EXP_BIAS;
fx &= MANTISSA_SHIFT_MASK >> shift;
return retBack();
}
else
if( exp < INF_NAN_EXP )
// value is integral
return retBack();
else
{
uint32_t mantissa = fx & MANTISSA_MASK;
// infinite, NaN: return value
if( !mantissa || mantissa & NAN_MASK )
return retBack();
// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );
return retBack();
}
else
{
// below +/-1.0
// return +/-0.0
fx &= SIGN_MASK;
return retBack();
}
}

memcpy() is the only valid way to have writeable aliasing in C++.
memcpy() is normally an intrinsic function and in the above code
the memcpy()s between integral and floating point types are just
moves between general purpose and FPU-registers.

Mut...@dastardlyhq.com

unread,

May 29, 2022, 4:21:03 AM5/29/22

to

On Sun, 29 May 2022 10:14:33 +0200
Bonita Montero <Bonita....@gmail.com> wrote:
>The trunc() implementation of VC++'s runtime was too slow for me,
>so I've written my own:

What do you want, a medal? A scooby snack?

Bonita Montero

unread,

May 29, 2022, 4:29:27 AM5/29/22

to

No I don't have your self esteem problems.

Helmut Schellong

unread,

May 29, 2022, 5:57:53 AM5/29/22

to

On 05/29/2022 10:14, Bonita Montero wrote:
> The trunc() implementation of VC++'s runtime was too slow for me,
> so I've written my own:
>

Bonita Montero

unread,

May 29, 2022, 6:45:04 AM5/29/22

to

Am 29.05.2022 um 11:57 schrieb Helmut Schellong:
> On 05/29/2022 10:14, Bonita Montero wrote:
>> The trunc() implementation of VC++'s runtime was too slow for me,
>> so I've written my own:
>>
>> [...]
> Falls Du damit ein Abschneiden eines Dateiinhaltes meinst -
> die Dateigröße kann direkt auf eine bestimmte Größe gesetzt werden.

Hast Du Demenz oder > 2 Promile ?
Das hat damit nix zu tun.
Du siehst paranoiderweise direkt wieder Kritik an deiner Arbeit.

Helmut Waitzmann

unread,

May 29, 2022, 4:46:41 PM5/29/22

to

Helmut Schellong <r...@schellong.biz>:

>On 05/29/2022 10:14, Bonita Montero wrote:
>> The trunc() implementation of VC++'s runtime was too slow for me,
>> so I've written my own:
>>
>> [...]
>Falls Du damit ein Abschneiden eines Dateiinhaltes meinst -
>die Dateigröße kann direkt auf eine bestimmte Größe gesetzt werden.

Es lohnt nicht, auf den Beitrag von Unbekannt, alias Bonita Montero,
alias Malito Molesto, einzugehen. Er enthält keinen Code der
Programmiersprache C und hat auch sonst nichts mit ihr zu tun.
Außerdem ist die Verkehrssprache hier deutsch. Wer auf Englisch
Belange der Programmiersprache C diskutieren will, kann das in den
entsprechenden Gruppen der «comp.lang.all»‐Hierarchie tun.

Kurz: Malitos Beitrag ist hier offtopic.

Helmut Waitzmann

unread,

May 29, 2022, 4:46:42 PM5/29/22

to

Bonita Montero <Bonita....@gmail.com>:

>Am 29.05.2022 um 11:57 schrieb Helmut Schellong:
>> On 05/29/2022 10:14, Bonita Montero wrote:
>>> The trunc() implementation of VC++'s runtime was too slow for me,
>>> so I've written my own:
>>>
>>> [...]
>> Falls Du damit ein Abschneiden eines Dateiinhaltes meinst -
>> die Dateigröße kann direkt auf eine bestimmte Größe gesetzt werden.
>
>Hast Du Demenz oder > 2 Promile ?
>

Malito Molesto, du legst mal wieder ein Verhalten an den Tag, das
unter aller Sau ist.

>Das hat damit nix zu tun.
>

Mit C hat dein Beitrag auch nichts zu tun. Was also willst du hier
überhaupt?

John McCue

unread,

May 29, 2022, 5:33:00 PM5/29/22

to

Trimmed followups to comp.lang.c

Well if I had a scobby snack I would send it to Bonita.

Thanks Bonita, gives me something to play with :)
John

--
[t]csh(1) - "An elegant shell, for a more... civilized age."
- Paraphrasing Star Wars

Christian Gollwitzer

unread,

May 29, 2022, 5:36:16 PM5/29/22

to

Am 29.05.22 um 10:14 schrieb Bonita Montero:

>
> memcpy() is the only valid way to have writeable aliasing in C++.
> memcpy() is normally an intrinsic function and in the above code
> the memcpy()s between integral and floating point types are just
> moves between general purpose and FPU-registers.

How about reinterpret_cast<> ?

Christian

Kenny McCormack

unread,

May 30, 2022, 12:00:43 AM5/30/22

to

In article <t6va0v$eno$1...@dont-email.me>,

Bonita Montero <Bonita....@gmail.com> wrote:
>The trunc() implementation of VC++'s runtime was too slow for me,
>so I've written my own:
>
>#include <cstdint>
>#include <limits>
>#include <cfenv>
>#include <cstring>
>
>using namespace std;

Hmmm... What language is this? What newsgroup is this???

Hmmmm....

--
I love the poorly educated.

Juha Nieminen

unread,

May 30, 2022, 2:22:10 AM5/30/22

to

Doing type punning, ie. interpreting the bit representation of some type
as a value of an incompatible type, by reinterpret-casting a pointer to
that value, or by using a union, is technically speaking UB in C++
(but allowed in C, I think since C11 or the like), even though in
practice it does work (for the same reason it does in C).
The only "official" way of doing it (prior to C++20) was to use
std::memcpy() (and hope that the compiler will optimize it into
equivalent code as using a union).

C++20 added an official way of doing type punning: std::bit_cast.

David Brown

unread,

May 30, 2022, 2:59:09 AM5/30/22

to

On 30/05/2022 08:22, Juha Nieminen wrote:
> In comp.lang.c++ Christian Gollwitzer <auri...@gmx.de> wrote:
>> Am 29.05.22 um 10:14 schrieb Bonita Montero:
>>>
>>> memcpy() is the only valid way to have writeable aliasing in C++.
>>> memcpy() is normally an intrinsic function and in the above code
>>> the memcpy()s between integral and floating point types are just
>>> moves between general purpose and FPU-registers.
>>
>> How about reinterpret_cast<> ?
>
> Doing type punning, ie. interpreting the bit representation of some type
> as a value of an incompatible type, by reinterpret-casting a pointer to
> that value, or by using a union, is technically speaking UB in C++
> (but allowed in C, I think since C11 or the like), even though in
> practice it does work (for the same reason it does in C).

Using unions for type-punning has, AFAIUI, always been defined behaviour
in C. The wording was changed in C99 to make it clear that it is
allowed, but the way the change was done (as a footnote) indicates that
this was meant as a clarification, not a change to the language. In
C++, however, using unions for type-punning has always been UB. (This
can be viewed as an indication that C++ is more strongly typed than C.)

(You are of course correct that reinterpret_cast can't be used for
accessing one type as an incompatible type.)

> The only "official" way of doing it (prior to C++20) was to use
> std::memcpy() (and hope that the compiler will optimize it into
> equivalent code as using a union).

I believe you could also write your own memcpy version - using char or
unsigned char to access the data, or std::byte in C++17. (You might
want that if you have a poorly optimising compiler that uses a library
call for memcpy even in simple cases.)

>
> C++20 added an official way of doing type punning: std::bit_cast.

Yes.

mut...@dastardlyhq.com

unread,

May 30, 2022, 3:52:16 AM5/30/22

to

On Mon, 30 May 2022 06:22:09 -0000 (UTC)
Juha Nieminen <nos...@thanks.invalid> wrote:
>C++20 added an official way of doing type punning: std::bit_cast.

Haven't heard of that. Useful to know. Thanks.

Paavo Helde

unread,

May 30, 2022, 4:58:38 AM5/30/22

to

30.05.2022 09:22 Juha Nieminen kirjutas:

> Doing type punning, ie. interpreting the bit representation of some type
> as a value of an incompatible type, by reinterpret-casting a pointer to
> that value, or by using a union, is technically speaking UB in C++
> (but allowed in C, I think since C11 or the like), even though in
> practice it does work (for the same reason it does in C).

It might work sometimes, until it doesn't. Here is a counter-example. It
uses std::launder() (which ought to be kind of a null-op, not quite sure
why using it makes a difference).

$ cat test1.cpp
#include <iostream>
#include <new>
int main() {
float x = 3.14;
int y = *std::launder(reinterpret_cast<int*>(&x));
std::cout << y << "\n";
}

$ g++ -std=c++20 -O0 -Wall test1.cpp
$ ./a.exe
1078523331

$ g++ -std=c++20 -O3 -Wall test1.cpp
$ ./a.exe
0

You can see with -O3 it has "optimized" the result to be 0. It has all
the rights to do that because this code contains UB-s.

With memcpy it works as intended also with -O3:

$ cat test1.cpp
#include <iostream>
#include <cstring>
int main() {
float x = 3.14;
int y;
std::memcpy(&y, &x, sizeof(int));
std::cout << y << "\n";
}

$ g++ -std=c++20 -O0 -Wall test1.cpp
$ ./a.exe
1078523331
$ g++ -std=c++20 -O3 -Wall test1.cpp
$ ./a.exe
1078523331

$ g++ -v
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-pc-cygwin/10/lto-wrapper.exe
Target: x86_64-pc-cygwin
Configured with:
/mnt/share/cygpkgs/gcc/gcc.x86_64/src/gcc-10.2.0/configure
--srcdir=/mnt/share/cygpkgs/gcc/gcc.x86_64/src/gcc-10.2.0 --prefix=/usr
--exec-prefix=/usr --localstatedir=/var --sysconfdir=/etc
--docdir=/usr/share/doc/gcc --htmldir=/usr/share/doc/gcc/html -C
--build=x86_64-pc-cygwin --host=x86_64-pc-cygwin
--target=x86_64-pc-cygwin --without-libiconv-prefix
--without-libintl-prefix --libexecdir=/usr/lib
--with-gcc-major-version-only --enable-shared --enable-shared-libgcc
--enable-static --enable-version-specific-runtime-libs
--enable-bootstrap --enable-__cxa_atexit --with-dwarf2
--with-tune=generic --enable-languages=c,c++,fortran,lto,objc,obj-c++
--enable-graphite --enable-threads=posix --enable-libatomic
--enable-libgomp --enable-libquadmath --enable-libquadmath-support
--disable-libssp --enable-libada --disable-symvers --with-gnu-ld
--with-gnu-as --with-cloog-include=/usr/include/cloog-isl
--without-libiconv-prefix --without-libintl-prefix --with-system-zlib
--enable-linker-build-id --with-default-libstdcxx-abi=gcc4-compatible
--enable-libstdcxx-filesystem-ts
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 10.2.0 (GCC)

Bonita Montero

unread,

May 30, 2022, 5:32:15 AM5/30/22

to

reinterpret_cast<int_type &>(floatTyped) didn't work wheh I tried
this the last time. But for me this isn't a problem since memcpy
works and is intrinsically optimized with MSVC and clang-cl.

Bonita Montero

unread,

May 30, 2022, 5:34:07 AM5/30/22

to

I had a look at the g++ code. g++ is even faster because it simply
does an integer-store and load in series and checks for the overflow
-flag while storing. If it is sed to choses another, more complex,
path.
Maybe there are some MSVC-intrinsics that allow this conversion from
float to int and to have a char, signalling the overflow-flag.

Bonita Montero

unread,

May 30, 2022, 5:35:00 AM5/30/22

to

Am 30.05.2022 um 06:00 schrieb Kenny McCormack:
> In article <t6va0v$eno$1...@dont-email.me>,
> Bonita Montero <Bonita....@gmail.com> wrote:
>> The trunc() implementation of VC++'s runtime was too slow for me,
>> so I've written my own:
>>
>> #include <cstdint>
>> #include <limits>
>> #include <cfenv>
>> #include <cstring>
>>
>> using namespace std;
>
> Hmmm... What language is this? What newsgroup is this???

It's about the basic algorithm, it woudln't be much different in C.

Stefan Kanthak

unread,

May 30, 2022, 11:54:10 AM5/30/22

to

"Helmut Waitzmann" <nn.th...@xoxy.net> schrieb:

> Helmut Schellong <r...@schellong.biz>:
>>On 05/29/2022 10:14, Bonita Montero wrote:
>>> The trunc() implementation of VC++'s runtime was too slow for me,
>>> so I've written my own:
>>>
>>> [...]
>>Falls Du damit ein Abschneiden eines Dateiinhaltes meinst -
>>die Dateigröße kann direkt auf eine bestimmte Größe gesetzt werden.

AUTSCH! trunc() <> truncate()
Schellong demonstriert 'mal wieder, dass er von C KEINE AHNUNG hat!

> Es lohnt nicht, auf den Beitrag von Unbekannt, alias Bonita Montero,
> alias Malito Molesto, einzugehen. Er enthält keinen Code der
> Programmiersprache C und hat auch sonst nichts mit ihr zu tun.

Abgesehen davon: niemand mit klarem Verstand implementiert eine TRIVIALE
Funktion wie trunc() in C oder gar C++!
Vor allem nicht fuer Windows, von dem 99.99+% aller Installationen auf
Intel-Prozessoren laufen, deren SSE 4.1-Befehlssatz seit 14.5 (in Worten:
VIERZEHNEINHALB) Jahren ROUNDSS/ROUNDSD kennt!

JFTR: fuer 64-bittige Altherthuemer implementiert jeder nicht voellig
Ahnungslose stattdessen folgende Befehlssequenz, gerne auch mit
den allen Compiler-Herstellern von Intel zur Verfuegung gestellten
SSE-Intrinsics:

; argument in xmm0
mov rax, 4330000000000000h
movq xmm2, rax ; xmm2 = 0x1.0p+52 = minimum non-fractional number
mov rax, 3FF0000000000000h
xorpd xmm1, xmm1 ; xmm1 = 0.0
subsd xmm1, xmm0 ; xmm1 = -argument
andpd xmm1, xmm0 ; xmm1 = |argument|
xorpd xmm0, xmm1 ; xmm0 = (argument & -0.0) ? -0.0 : +0.0
movsd xmm3, xmm1 ; xmm3 = |argument|
addsd xmm1, xmm2 ; xmm1 = |argument| + 0x1.0p+52
subsd xmm1, xmm2 ; xmm1 = |argument| - 0x1.0p+52 = rint(|argument|)
movq xmm2, rax ; xmm2 = 0x1.0p+0
cmpsd xmm3, xmm1, 1 ; xmm3 = (|argument| < rint(|argument|)) ? ~0 : 0
andpd xmm3, xmm2 ; xmm3 = (|argument| < rint(|argument|)) ? 1.0 : 0.0
subsd xmm1, xmm3 ; xmm1 = (|argument| < rint(|argument|)) ? -1.0 : 0.0
orpd xmm0, xmm1 ; xmm0 = trunc(argument)

Und fuer 32-bittige C-Programme, die dummerweise noch die FPU missbrauchen,
natuerlich ohne bedingte Spruenge oder den Missbrauch der Ganzzahl-Register:

; argument in st(0)
fld st(0)
fabs
fld st(0)
frndint ; st(0) = rint(argument), st(1) = |argument|, st(2) = argument
fxch st(1)
fucomip st(0), st(1)
fldz
fld1
fcmovnb st(0), st(1) ; st(0) = rint(|argument|) <= |argument| ? 0.0 : 1.0, st(1) = 0.0,
; st(2) = rint(argument), st(3) = argument
fsubp st(2), st(0) ; st(0) = 0.0, st(1) = trunc(|argument|), st(2) = argument
fucomip st(0), st(2)
fst st(1)
fchs ; st(0) = -trunc(|argument|), st(1) = trunc(|argument|)
fcmovbe st(0), st(1)
fstp st(1) ; st(0) = trunc(argument)

>> Außerdem ist die Verkehrssprache hier deutsch. Wer auf Englisch
>> Belange der Programmiersprache C diskutieren will, kann das in den

>> entsprechenden Gruppen der «comp.lang.all»-Hierarchie tun.

>
> Kurz: Malitos Beitrag ist hier offtopic.

Immerhin taugt er hervorragend als SCHLECHTES Beispiel, und das sogar in
mehrfacher Hinsicht.

Stefan
--
<https://www.duden.de/rechtschreibung/Kanthaken>

Helmut Schellong

unread,

May 30, 2022, 1:30:43 PM5/30/22

to

On 05/30/2022 17:52, Stefan Kanthak wrote:
> "Helmut Waitzmann" <nn.th...@xoxy.net> schrieb:
>
>> Helmut Schellong <r...@schellong.biz>:
>>> On 05/29/2022 10:14, Bonita Montero wrote:
>>>> The trunc() implementation of VC++'s runtime was too slow for me,
>>>> so I've written my own:
>>>>
>>>> [...]
>>> Falls Du damit ein Abschneiden eines Dateiinhaltes meinst -
>>> die Dateigröße kann direkt auf eine bestimmte Größe gesetzt werden.
>
> AUTSCH! trunc() <> truncate()
> Schellong demonstriert 'mal wieder, dass er von C KEINE AHNUNG hat!

Du Quatschkopf kannst nicht richtig lesen und nicht richtig denken!
Fühlst Du Dich wohler nach solch unhaltbarem Geschwafel?

Es wurde C++ und von VC++ geschrieben.
Das hat mit C-Libraries nichts zu tun.

In C ist trunc() eine Math-Funktion mit double.
truncate() und ftruncate() ist POSIX und <unistd.h>.
Die und chsize() habe ich seit etwa 1997 implementiert.
Das ist alter Tobak für mich.

Etwa zu der Zeit habe ich math87.a (in ASM) und math87.h entwickelt.
extern double rnd87(double);
extern double up87(double);
extern double dwn87(double);
extern double chop87(double);
Da habe ich alle vier Rundungsfunktionen entwickelt.

[...]

Bonita Montero

unread,

May 30, 2022, 11:40:52 PM5/30/22

to

The code can be much simpler and faster:

#include <cstdint>
#include <limits>
#include <cfenv>
#include <cstring>

#include "fmath.h"

using namespace std;

double ftrunc( double d )
{

unsigned const
MANTISSA_BITS = 52,
EXP_BIAS = 0x3FF,
INF_NAN_BASE = 0x7FF;

constexpr uint64_t
EXP_MASK = (uint64_t)0x7FF << MANTISSA_BITS,

MANTISSA_MASK = 0x000FFFFFFFFFFFFFu,
NAN_MASK = 0x0008000000000000u,

MIN_PURE_INTEGRAL_EXP = (uint64_t)(EXP_BIAS + MANTISSA_BITS) <<

MANTISSA_BITS,
INF_NAN_EXP = (uint64_t)INF_NAN_BASE << MANTISSA_BITS;

uint64_t dx;
memcpy( &dx, &d, 8 );

uint64_t exp = dx & EXP_MASK;

if( exp < MIN_PURE_INTEGRAL_EXP ) [[likely]]
return (double)(int64_t)d;
if( exp < INF_NAN_EXP ) [[likely]]
// value has integral only mantissa bits
return d;

uint64_t mantissa = dx & MANTISSA_MASK;
// infinite, NaN: return value

if( !mantissa || mantissa & NAN_MASK ) [[likely]]
return d;

// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );

return d;

}

float ftrunc( float f )
{
static_assert(sizeof(float) == 4, "sizeof(float) not equal to
sizeof(uint32_t)");
static_assert(numeric_limits<float>::is_iec559, "float must be IEEE-754");

constexpr unsigned

MANTISSA_BITS = 23,
EXP_BIAS = 0x7F,
INF_NAN_BASE = 0xFF;

constexpr uint32_t

EXP_MASK = (uint32_t)0xFF << MANTISSA_BITS,
SIGN_MASK = (uint32_t)0x100 << MANTISSA_BITS,
MANTISSA_MASK = 0x007FFFFFu,
NAN_MASK = 0x00400000u,

MIN_PURE_INTEGRAL_EXP = (uint64_t)(EXP_BIAS + MANTISSA_BITS) <<
MANTISSA_BITS,
INF_NAN_EXP = (uint32_t)INF_NAN_BASE << MANTISSA_BITS;

uint32_t fx;
memcpy( &fx, &f, 4 );

uint32_t exp = fx & EXP_MASK;

if( exp < MIN_PURE_INTEGRAL_EXP )
return (float)(int64_t)f;
if( exp < INF_NAN_EXP )
// value has integral only mantissa bits
return f;

uint32_t mantissa = fx & MANTISSA_MASK;
// infinite, NaN: return value
if( !mantissa || mantissa & NAN_MASK )

return f;

// SNaN: raise exception on SNaN if necessary
feraiseexcept( FE_INVALID );

return f;
}

Bonita Montero

unread,

Jun 1, 2022, 12:07:46 PM6/1/22

to

Unfortunately VC++ calls its own conversion algorithm when converting
from a floating point value to an integer. That makes the before posted
not significantly faster than that of MSVC++ and even slower than that
before.
clang-cl, the mostly MSVC++-compatible version of clang, doesn't do
that but uses the SSE-instructions to convert from floating point to
integer directly and the code runs a lot faster with that. Because
this isn't reliable when you won't switch to clang-cl (the C++20
-featues are partitially a disaster and I've a lot of code that
does compile with current MSVC++ and g++ 11.x) I re-wrote the whole
thing in assembly. Here it is:

PUBLIC ?ftrunc@@YANN@Z

_TEXT SEGMENT
?ftrunc@@YANN@Z PROC
cvttsd2si rax, xmm0
mov rcx, 08000000000000000h
cmp rax, rcx
je complex
cvtsi2sd xmm0, rax
ret
complex:
shr rcx, 52
and rcx, 07FFh
cmp rcx, 07FFh
jne asIs
mov rcx, 0FFFFFFFFFFFFFh
test rax, rcx
jz asIs
mov rcx, 08000000000000h
test rax, rcx
jnz asIs
stmxcsr dword ptr [esp - 4]
or byte ptr [esp - 4], 1
ldmxcsr dword ptr [esp - 4]
ret
asIs:
ret
?ftrunc@@YANN@Z ENDP

?ftrunc@@YAMM@Z PROC
cvttsd2si eax, xmm0
cmp eax, 080000000h
je complex
cvtsi2sd xmm0, eax
ret
complex:
mov ecx, eax
shr ecx, 23
and ecx, 07Fh
cmp ecx, 07Fh
jne asIs
test eax, 07FFFFFh
jz asIs
test eax, 0800000h
jnz asIs
stmxcsr dword ptr [esp - 4]
or byte ptr [esp - 4], 1
ldmxcsr dword ptr [esp - 4]
ret
asIs:
ret
?ftrunc@@YAMM@Z ENDP
_TEXT ENDS

END

You might think why I post assembly code here.
But that's still valid for MSVC C++ programmers only.

Bonita Montero

unread,

Jun 1, 2022, 12:09:30 PM6/1/22

to

> test eax, 0800000h
test eax, 0400000h

Bonita Montero

unread,

Jun 1, 2022, 10:42:53 PM6/1/22

to

There's even a simpler way with SSE 4.1:

inline double ftrunc( double d )
{
__m128d mm;
mm.m128d_f64[0] = d;
_mm_round_sd( mm, mm, _MM_FROUND_FLOOR );
return mm.m128d_f64[0];
}

inline double ftrunc( float f )
{
__m128 mm;
mm.m128_f32[0] = f;
_mm_round_ss( mm, mm, _MM_FROUND_FLOOR );
return mm.m128_f32[0];
}

With that a simple double trunc() 11.6 times faster on my CPU.

jak

unread,

Jun 2, 2022, 1:44:06 AM6/2/22

to

#include <math.h>

double myTrunc(double value)
{
return value - fmod(value, 1.);
}

Bonita Montero

unread,

Jun 2, 2022, 2:13:47 AM6/2/22

to

That solution is 194 times slower with g++ 11.1 and 188 times
slower with clang 13. Seems the compilers don't do any special
optimization if the divisor 1.0.

jak

unread,

Jun 2, 2022, 5:14:29 AM6/2/22

to

compared with your solution or with the trunc function of msvc?
also, I think you will need to verify that the SEE feature exists
and is enabled on the processor before using it.

Bonita Montero

unread,

Jun 2, 2022, 6:48:20 AM6/2/22

to

This featrure came with the first Intel i-series processors.
The generation before were the Core II processors, these are
so ancient and homeopathically used today that I can silently
assume that the code runs.