Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Performance of denormal numbers

12 views
Skip to first unread message

Bonita Montero

unread,
Sep 17, 2022, 10:36:43 AM9/17/22
to
I wanted to check if denormal numbers have slower performance on
modern CPUs. Intel introduced the DAZ / FTZ Bits with SSE1 because
denormals were even handled in microcode:

#include <iostream>
#include <bit>
#include <cstdint>
#include <chrono>
#include <utility>
#include <atomic>

using namespace std;
using namespace chrono;

uint64_t denScale( uint64_t rounds, bool den );

int main()
{
auto bench = []( bool den ) -> double
{
constexpr uint64_t ROUNDS = 25'000'000;
auto start = high_resolution_clock::now();
int64_t nScale = denScale( ROUNDS, den );
return (double)duration_cast<nanoseconds>(
high_resolution_clock::now() - start ).count() / nScale;
};
double
tDen = bench( true ),
tNorm = bench( false ),
rel = tDen / tNorm - 1;
cout << tDen << endl;
cout << tNorm << endl;
cout << trunc( 100 * 10 * rel + 0.5 ) / 10 << "%" << endl;
}

MASM code:

PUBLIC ?denScale@@YA_K_K_N@Z

CONST SEGMENT
DEN DQ 00008000000000000h
ONE DQ 03FF0000000000000h
P5 DQ 03fe0000000000000h
CONST ENDS

_TEXT SEGMENT
?denScale@@YA_K_K_N@Z PROC
xor rax, rax
test rcx, rcx
jz byeBye
mov r8, ONE
mov r9, DEN
test dl, dl
cmovnz r8, r9
movq xmm1, P5
mov rax, rcx
loopThis:
movq xmm0, r8
REPT 52
mulsd xmm0, xmm1
ENDM
sub rcx, 1
jae loopThis
mov rdx, 52
mul rdx
byeBye:
ret
?denScale@@YA_K_K_N@Z ENDP
_TEXT ENDS
END

For my PC normal numbers have a 25% higher throughput.
Feel free to post your results also.

0 new messages