Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Performance of denormal numbers

8 views
Skip to first unread message

Bonita Montero

unread,
Sep 17, 2022, 10:36:56 AM9/17/22
to
I wanted to check if denormal numbers have slower performance on
modern CPUs. Intel introduced the DAZ / FTZ Bits with SSE1 because
denormals were even handled in microcode:

#include <iostream>
#include <bit>
#include <cstdint>
#include <chrono>
#include <utility>
#include <atomic>

using namespace std;
using namespace chrono;

uint64_t denScale( uint64_t rounds, bool den );

int main()
{
auto bench = []( bool den ) -> double
{
constexpr uint64_t ROUNDS = 25'000'000;
auto start = high_resolution_clock::now();
int64_t nScale = denScale( ROUNDS, den );
return (double)duration_cast<nanoseconds>(
high_resolution_clock::now() - start ).count() / nScale;
};
double
tDen = bench( true ),
tNorm = bench( false ),
rel = tDen / tNorm - 1;
cout << tDen << endl;
cout << tNorm << endl;
cout << trunc( 100 * 10 * rel + 0.5 ) / 10 << "%" << endl;
}

MASM code:

PUBLIC ?denScale@@YA_K_K_N@Z

CONST SEGMENT
DEN DQ 00008000000000000h
ONE DQ 03FF0000000000000h
P5 DQ 03fe0000000000000h
CONST ENDS

_TEXT SEGMENT
?denScale@@YA_K_K_N@Z PROC
xor rax, rax
test rcx, rcx
jz byeBye
mov r8, ONE
mov r9, DEN
test dl, dl
cmovnz r8, r9
movq xmm1, P5
mov rax, rcx
loopThis:
movq xmm0, r8
REPT 52
mulsd xmm0, xmm1
ENDM
sub rcx, 1
jae loopThis
mov rdx, 52
mul rdx
byeBye:
ret
?denScale@@YA_K_K_N@Z ENDP
_TEXT ENDS
END

For my PC normal numbers have a 25% higher throughput.
Feel free to post your results also.

0 new messages