Bonita Montero
unread,Sep 25, 2023, 8:54:28 AM9/25/23You do not have permission to delete messages in this group
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to
This is my current own from_chars variant.
It is more precise than any from_chars implementation
template<std::integral Char>
conv_result_s<Char> gen_double( std::span<Char> range, double value,
int16_t nFractDigits, bool sign, std::chars_format fmt, bool round = false )
{
static_assert(sizeof(size_t) == 4 || sizeof(size_t) == 8, "must be 64
or 32 bit platform");
using namespace std;
// absolute number of fract-digits,
// negatives are a boundary for signficant bits,
// poisitives may append further zeroes
size_t absFractDigits = nFractDigits >= 0 ? nFractDigits : -nFractDigits;
constexpr uint64_t
IMPLICIT_BIT = 1ull << 52,
MANTISSA_MASK = IMPLICIT_BIT - 1;
// binary representation of value
uint64_t const bin = bit_cast<uint64_t>( value );
// signed binary representation of value
int64_t const sBin = bin;
int exp = bin >> 52 & 0x7FF;
// error-return for values that don't fit into range
auto tooLarge = [&]( auto it ) { return conv_result_s<Char>(
errc::value_too_large, it ); };
// processing Inf / (Q)NaN
auto special = [&]( auto, string_view const &svSpecial ) ->
conv_result_s<Char>
{
auto gen = range.begin();
// range has enough space ?
if( range.size() < sign + svSpecial.length() )
return tooLarge( gen );
// prepend sign
if( sign )
*gen++ = "+-"[sBin < 0];
// append svSpecial and return
return { errc(), copy( svSpecial.begin(), svSpecial.end(), gen ) };
};
// Inf / NaN exponent ?
if( exp == 0x7FF ) [[unlikely]]
if( !(bin & MANTISSA_MASK) )
// Inf
return special( []() {}, "Inf" );
else
{
// SNaN / QNaN
static char const *nanTable[2] = { "SNaN", "QNaN" };
char const *nan = nanTable[(size_t)(bin >> 51 & 1)];
return special( []() {}, { nan, nan + 4 } );
}
// mask physical mantissa bits
uint64_t mantissa = bin & MANTISSA_MASK;
// exponent is != 0: normalized number
if( exp ) [[likely]]
// prepend implicit one
mantissa = IMPLICIT_BIT | mantissa,
// make exponent signed
exp -= 0x3FF;
else
{
if( !mantissa )
{
// +/-0.0
auto gen = range.begin();
// additional digits for exponent
size_t additional = fmt == chars_format::scientific ? 3 : 0;
// enough space for digits ?
if( range.size() >= sign + 2 + absFractDigits + additional )
// return err
return tooLarge( gen );
// prepend sign
if( sign )
*gen++ = "+-"[sBin < 0];
// leading zero, maybe separator
gen = copy_n( "0.", 1 + (bool)absFractDigits, gen );
// fill fract-digits
gen = fill_n( gen, absFractDigits, '0' );
// append exponent, if scientific
gen = copy_n( "e00", additional, gen );
return { errc(), gen };
}
// normalize mantissa
unsigned normShift = countl_zero( mantissa ) - 11;
mantissa <<= normShift;
// correct exponent according to normalization
exp = -0x3FE - normShift;
}
// reserve enough space for maximum number of digits
thread_local vector<ndi<uint8_t>>
prefixDigits( 0x400 ),
suffixDigits( 0x400 );
auto prefDigHead = prefixDigits.end();
bool skipFractZeroes = false;
// has integer-bits ?
if( exp >= 0 )
{
#if defined(_MSC_VER) && defined(_M_X64)
constexpr bool HAS128 = true;
using Word = uint64_t;
using Dword = void;
constexpr unsigned WORD_DIGITS = 19;
#else
constexpr bool HAS128 = false;
constexpr bool BITS64 = sizeof(size_t) == 8;
using Word = conditional_t<sizeof(size_t) == 8, uint32_t, uint16_t>;
using Dword = size_t;
constexpr Word LARGE_DIV = BITS64 ? 1'000'000'000 : 10'000;
constexpr unsigned WORD_DIGITS = BITS64 ? 9 : 4;
#endif
constexpr unsigned WORD_BITS = sizeof(Word) * 8;
size_t smallPrefix;
unsigned tzCnt;
uint64_t prefix;
if( exp < 52 ) [[likely]]
prefix = mantissa >> 52 - exp,
tzCnt = 0;
else
prefix = mantissa,
tzCnt = exp - 52;
// mantissa, including logical suffix, doesn't fit into a Dword ?
if( exp >= sizeof(size_t) * 8 ) [[unlikely]]
{
// prefix is a large prefix, do the large integer math;
// prefix bits, survives call but not thread termination
thread_local vector<ndi<Word>> prefixBits( (0x401 + WORD_BITS - 1) /
WORD_BITS );
// fill leading zero uint32_ts
auto prefixHead = fill_n( prefixBits.begin(), tzCnt / WORD_BITS, 0 );
// begin of sigificant bits
auto prefixSignificant = prefixHead;
// prefix shift inside uint32_t
unsigned shift = tzCnt % WORD_BITS;
// write the physical prefix shifted into the logical prefix
if constexpr( !HAS128 )
do
*prefixHead++ = (Word)(prefix << shift),
prefix >>= WORD_BITS - shift,
shift = 0;
while( prefix );
else
if( shift )
{
*prefixHead++ = (uint64_t)(prefix << shift);
if( uint64_t high = prefix >> WORD_BITS - shift; high )
*prefixHead++ = high;
}
else
*prefixHead++ = prefix;
for( ; ; )
{
// remainder of our division-row
Word remainder = 0;
// start the division from the most significant bits on
auto bitsSweep = prefixHead;
auto divWord = [&]( auto ) -> bool
{
Word div;
#if defined(_MSC_VER) && defined(_M_X64)
div = div1e19( { *--bitsSweep, remainder }, &remainder );
#else
// compute division result and remainder for sub-prefix
Dword counter = (Dword)remainder << WORD_BITS | *--bitsSweep;
div = (Word)(counter / LARGE_DIV);
remainder = (Word)(counter % LARGE_DIV);
#endif
*bitsSweep = div;
return div;
};
// divide the highest prefix half_size_t;
// reduce logical prefix if result was zero
prefixHead -= !divWord( []() {} );
// do the divides for the rest of the logical prefix
while( bitsSweep != prefixSignificant )
divWord( []() {} );
auto unroll = []<size_t ... Indices>( index_sequence<Indices ...>,
auto fn )
{
(fn.template operator ()<Indices>(), ...);
};
if( bitsSweep == prefixBits.begin() ) [[likely]]
// produce HST_DIGITS digits unrolled from the remainder
unroll( make_index_sequence<WORD_DIGITS>(),
[&]<size_t>()
{
*--prefDigHead = (uint8_t)(remainder % 10u);
remainder /= 10u;
} );
else
*--prefixSignificant = remainder,
unroll( make_index_sequence<WORD_DIGITS>(),
[&]<size_t>() { *--prefDigHead = 0; } );
size_t nWords = prefixHead - prefixBits.begin();
#if defined(_MSC_VER) && defined(_M_X64)
if( nWords == 1 ) [[unlikely]]
{
smallPrefix = prefixBits[0];
break;
}
#else
// logical mantissa fits into a Dword now ?
if( prefixHead - prefixBits.begin() <= 2 ) [[likely]]
{
// check if we have two Words (2 * size_t)
assert(prefixHead - prefixBits.begin() == 2);
// assemble small prefix
smallPrefix = (Dword)prefixBits[1] << WORD_BITS | prefixBits[0];
break;
}
#endif
}
}
else
// prefix fits in size_t
smallPrefix = (size_t)(prefix << tzCnt);
// fast path for mantissa values below 0xA00000000u
do
*--prefDigHead = (uint8_t)(smallPrefix % 10u);
while( smallPrefix /= 10u );
}
else
// prefix is 0.
*--prefDigHead = 0,
// skip leading fract zeroes if we're in scientific mode
skipFractZeroes = fmt == chars_format::scientific;
auto suffDigTail = suffixDigits.begin();
// remaining fract digts; prefix might be moved partitially to suffix
size_t
nRemFractDigits = absFractDigits + round,
nPrefixes = prefixDigits.end() - prefDigHead;
if( fmt == chars_format::scientific )
nRemFractDigits -= nRemFractDigits > nPrefixes - 1 ? nPrefixes - 1 : 0;
constexpr unsigned BITS_PER_DIGIT = 0x6A4D3D; // ceil( log( 10 ) / log(
2 ) * 2 ^ 21 )
int decExp = 0;
#if defined(NDEBUG)
constexpr bool FRACT_DEBUG = false;
#else
constexpr bool FRACT_DEBUG = true;
#endif
// compute the number of fract digits which are guaranteed to be zero;
// compute fract digits only if there are more digits than the
guaranteed zeroes
if( unsigned guaranteedZeroes = ((unsigned)(exp < -1 ? -exp - 1 : 0) <<
21) / BITS_PER_DIGIT;
exp < 52 && (FRACT_DEBUG || skipFractZeroes && nRemFractDigits ||
nRemFractDigits > guaranteedZeroes) )
{
unsigned nFractBits = 52 - exp;
uint64_t suffix60;
if( nFractBits <= 60 ) [[likely]]
suffix60 = (mantissa & (1ull << nFractBits) - 1) << 60 - nFractBits;
else
{
#if defined(__SIZEOF_INT128__)
using Word = uint64_t;
using uint128_t = unsigned __int128;
#elif defined(_MSC_VER) && defined(_M_X64) && defined(XXX)
// multiplication encapsulated in mul1e19add
using Word = uint64_t;
using uint128_t = void;
#else
using Word = uint32_t;
using uint128_t = void;
#endif
// store the words as uint64_t, do the math with 128 bits
using Dword = conditional_t<sizeof(Word) == 8, uint128_t, uint64_t>;
// have a thread-local buffer which also might fit for the next time
constexpr unsigned WORD_BITS = sizeof(Word) * 8;
// largest power of 10 that fits into a Word
constexpr Word WORD_MUL = sizeof(Word) == 8 ?
(Word)10'000'000'000'000'000'000u : 1'000'000'000;
// number of digts of a WORD_MUL remainder
constexpr unsigned WORD_DIGITS = sizeof(Word) == 8 ? 19 : 9;
// number of words that fit into an uint64_t
constexpr size_t UI64_SCALE = sizeof(uint64_t) / sizeof(Word);
// thread-local buffer for the logical mantissa to survive each call
thread_local vector<Word> fractBits( (0x3FF + 52 + WORD_BITS - 1) /
WORD_BITS + 2 );
auto
bitsHigh = fractBits.begin(),
bitsLow = fractBits.begin();
// put highest set bit to bit 63
uint64_t fractMantissa = mantissa << 11;
unsigned
// bit position of highest set bit
iHsb = nFractBits - 53,
// right shift of highest set bit in logical mantissa
shift = iHsb % WORD_BITS;
//size_t nWords = (shift + 64 - countr_zero( fractMantissa ) +
WORD_BITS - 1) / WORD_BITS;
if constexpr( sizeof(Word) == 4 )
{
bool skip = true;
// write uint32-part of fractMantissa;;
// may skip the word and also increaste bitsLow if resulting
uint32_t is zero
auto advance = [&]<int Skippable>( integral_constant<int,
Skippable>, unsigned shift )
{
uint32_t word = (uint32_t)(fractMantissa << 32 - shift);
*bitsLow = word;
skip = Skippable && skip && !word;
bitsLow += skip;
++bitsHigh;
fractMantissa >>= shift;
};
advance( integral_constant<int, 2>(), shift );
advance( integral_constant<int, 1>(), 32 );
advance( integral_constant<int, 0>(), 32 );
}
else
{
if( shift )
// we have two words, write the lower word
*bitsHigh = mantissa << 64 - shift,
// increase tail if the lower word was zero
bitsLow += !*bitsHigh++;
// write upper uint64_t
*bitsHigh++ = mantissa >> shift;
}
auto bitsZero = bitsHigh;
// fill leading logical mantissa words with zero
bitsHigh = fill_n( bitsHigh, iHsb / WORD_BITS, 0 );
for( ; ; )
{
Word overflow = 0;
// sweep from the end to produce results and overflows to the left
auto bitsSweep = bitsLow;
auto mulWord = [&]( auto ) -> bool
{
#if defined(_MSC_VER) && defined(_M_X64) && defined(XXX)
uint64_t low = mul1e19add( *bitsSweep, overflow, &overflow );
*bitsSweep++ = low;
return low;
#else
// 64 or 128 bit partitial result
Dword times10 = (Dword)*bitsSweep * WORD_MUL + overflow;
// extract overfow digit
overflow = (Word)(times10 >> WORD_BITS);
// write back lower part of result
*bitsSweep++ = (Word)times10;
return (Word)times10;
#endif
};
bitsLow += !mulWord( []() {} );
while( bitsSweep != bitsZero )
mulWord( []() {} );
// sweep didn't stop at zero word ?
if( bitsSweep == bitsHigh ) [[likely]]
{
// produce all WORD_DIGITS digits at suffDigTail
for( size_t i = WORD_DIGITS; i; )
suffDigTail[--i] = (uint8_t)(overflow % 10u),
overflow /= 10u;
// number of digits to move if necessary
size_t nDigits = WORD_DIGITS;
// index of first non-zero digit if we're skipping leading zeroes
size_t iNz = 0;
// were in scientific mode and we're currently skipping leading
zeros ?
if( skipFractZeroes )
{
// skip leading zeroes of chunk
do
if( suffDigTail[iNz] )
{
skipFractZeroes = false;
break;
}
while( ++iNz != WORD_DIGITS );
// reduce number of digits to move by number of skipped zeroes
nDigits -= iNz;
// adjust decimal exponent according to number of skipped zeroes
decExp -= (int)iNz;
}
size_t nTailSkip = nDigits > nRemFractDigits ? nDigits -
nRemFractDigits : 0;
if( (nDigits -= nTailSkip) == WORD_DIGITS ) [[likely]]
// set new tail beyond the currently produced digits
suffDigTail += WORD_DIGITS,
nRemFractDigits -= WORD_DIGITS;
else
// we either had leading zeroes skipped ot tail-sipping;
// move the digits to suffDigTail
suffDigTail = copy_n( suffDigTail + iNz, nDigits, suffDigTail ),
// reduce numer of remaining fract digits accordingly
nRemFractDigits -= nDigits;
}
else
{
// we've no overflow beyond bitsHigh;
// set the lowest zero-word to overfloe
*bitsZero++ = overflow;
// n Digits to set to zero, bounded by WORD_DIGITS
size_t nDigits = nRemFractDigits > WORD_DIGITS ? WORD_DIGITS :
nRemFractDigits;
if( !skipFractZeroes )
// we're making physical fracts
suffDigTail = fill_n( suffDigTail, nDigits, 0 ),
// reduce numer of remaining fract digits accordingly
nRemFractDigits -= nDigits;
else
// lower decimal exponent by number of skipped digits
decExp -= (int)nDigits;
}
// no more fract digits ?
if( !nRemFractDigits ) [[unlikely]]
{
suffix60 = 0;
break;
}
if( suffDigTail == suffixDigits.end() ) [[unlikely]]
{
suffix60 = 0;
break;
}
size_t nWords = bitsHigh - bitsLow;
// reached one uint64_t of tail bits ?
if( nWords > UI64_SCALE ) [[likely]]
// no: continue large integer processing
continue;
// remaining suffix would fit in 60 bit ?
if( sizeof(Word) == 4 && nWords < 2 || countr_zero(*bitsLow) >= 4)
[[unlikely]]
{
if constexpr( sizeof(Word) == 4 )
// last chunk left two words ?
if( nWords == 2 ) [[likely]]
// make 60 bit suffix from last two words
suffix60 = (uint64_t)bitsLow[1] << 28 | bitsLow[0] >> 4;
else
// last chunk ate also consumed the higher word;
// make remaining 60 bit suffix from the lower word
suffix60 = (uint64_t)bitsLow[0] << 28;
else
// we're in 128 bit mode; get remaining 60 bit suffix from one word
suffix60 = *bitsLow >> 4;
break;
}
}
}
if( suffix60 )
while( suffDigTail != suffixDigits.end() )
{
// procduce the next digit inside the upper four bits
suffix60 *= 10u;
// extract digit
uint8_t digit = suffix60 >> 60;
// skipping zeroes ?
if( digit || !skipFractZeroes ) [[likely]]
// extract an write back digit
*suffDigTail++ = digit,
skipFractZeroes = false;
else
// skip zero, decrement decimal exponent
--decExp;
// mask out produced digit
suffix60 &= (1ull << 60) - 1;
// suffix becomes zero
if( !suffix60 ) [[unlikely]]
break;
// we have remaining digits
if( !--nRemFractDigits ) [[unlikely]]
break;
}
}
auto suffDigHead = suffixDigits.begin();
// preliminary suffix digits begin
if( fmt == chars_format::scientific )
if( nPrefixes = prefixDigits.end() - prefDigHead; nPrefixes > 1 )
{
// shorter make_reverse_iterator
auto reverse = []( auto it ) { return make_reverse_iterator(it); };
// prefix digits to move
size_t nMove = nPrefixes - 1;
// set decimal exponent according to the shifted number of digits
decExp += (int)nMove;
// new end after insertion
auto shiftedEnd = suffDigTail + nMove;
// move existing suffixes reverse
auto rShiftedSuffix = copy( reverse( suffDigTail ), reverse(
suffixDigits.begin() ), reverse( shiftedEnd ) );
// insert prefix before shifted suffix reverse
copy_n( prefixDigits.rbegin(), nMove, rShiftedSuffix );
// set new end after shift and insertion
suffDigTail = shiftedEnd;
// leave only one prefix digit which was the prefix begin before
auto newHead = prefixDigits.end() - 1;
*newHead = *prefDigHead;
prefDigHead = newHead;
}
else if( !*prefDigHead )
{
assert(nPrefixes == 1);
for( ; suffDigHead != suffDigTail && !*suffDigHead; ++suffDigHead );
if( suffDigHead != suffDigTail )
*prefDigHead = *suffDigHead++;
decExp -= (int)(suffDigHead - suffixDigits.begin());
}
if( auto shortenSuffix = [&]( auto ) { for( ; suffDigTail !=
suffDigHead && !suffDigTail[-1]; --suffDigTail ); };
round && (size_t)(suffDigTail - suffixDigits.begin()) > absFractDigits )
{
// adjust suffix tail to the number of fract digits
suffDigTail = suffDigHead + absFractDigits;
// get carry from the first digit beyond the new tail
bool carry = *suffDigTail >= 5;
auto roundDigits = [&]<bool Prefix>( bool_constant<Prefix>, auto tail,
auto begin )
{
// scan digits for nines from the tail to thbe begin
if( Prefix || tail != begin )
do
{
uint8_t digit = tail[-1];
assert(digit <= 9);
carry = digit == 9;
if( !carry ) [[likely]]
// no further carries
return tail;
*--tail = 0;
} while( tail != begin );
// all digits were nine and there's carry
return begin;
};
// if there's a carry apply it until it becomes false
if( carry )
if( (suffDigTail = roundDigits( false_type(), suffDigTail,
suffDigHead )) == suffDigHead
&& roundDigits( true_type(), prefixDigits.end(), prefDigHead ) ==
prefDigHead ) [[unlikely]]
// all head digits were 9 => the value is now 10.0
if( fmt == chars_format::scientific )
// we have only one significant digit in scientific mode;
// make the value 1.0eX
*prefDigHead = 1,
++decExp;
else
// prepend a one to the head
*--prefDigHead = 1;
else;
else
shortenSuffix( []() {} );
}
else
{
size_t nSuffixDigits = suffDigTail - suffDigHead;
suffDigTail = nSuffixDigits <= nFractDigits ? suffDigTail :
suffDigHead + nFractDigits;
shortenSuffix( []() {} );
}
// iterator to the current position in the output range
auto gen = range.begin();
// extens sign flag if value is negative
sign = sign || sBin < 0;
auto hasN = [&]( size_t n ) { return (size_t)(range.end() - gen) >= n; };
// enough space for sign, prefix and optionally comma and suffix
if( !hasN( sign + (prefixDigits.end() - prefDigHead) +
(bool)absFractDigits + absFractDigits ) )
return tooLarge( gen );
// has sign ?
if( sign )
// write sign
*gen++ = "+-"[sBin < 0];
// append prefix-digits
auto prefDigScn = prefDigHead;
do
*gen++ = '0' + *prefDigScn++;
while( prefDigScn != prefixDigits.end() );
// any suffix digits ?
if( absFractDigits && (fmt == chars_format::scientific || suffDigHead
!= suffDigTail) )
{
*gen++ = '.';
// append suffix digits;
// chopped to <= absFractDigits before
for( auto dig = suffDigHead; dig != suffDigTail; ++dig )
*gen++ = '0' + *dig;
// append fill-zeroes only if we don't procuce fract digits until the
last significant
if( nFractDigits > 0 )
// append futher zeroes
gen = fill_n( gen, absFractDigits - (suffDigTail - suffDigHead), 0 );
}
if( fmt == chars_format::scientific )
{
// enough space for "e" and least one exponent digit ?
if( !hasN( 3 ) ) [[unlikely]]
return tooLarge( gen );
*gen++ = 'e';
// append exponent
auto [ec, next] = gen_dec( span( gen, range.end() ), decExp, true );
if( ec != errc() ) [[unlikely]]
return { ec, next };
gen = next;
}
return { errc(), gen };
}