Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

That's while I love C++17's fold expressions

21 views
Skip to first unread message

Bonita Montero

unread,
Nov 7, 2022, 10:31:22 PM11/7/22
to
template<bool Zero = false, bool Throw = true,
std::random_access_iterator Iterator, typename Consumer>
requires std::convertible_to<std::iter_value_t<Iterator>, char8_t> &&
requires( Consumer consumer ) { { consumer( (char32_t)-1 ) }; }
utf8_exception::err_t parseUtf8( Iterator begin, Iterator end, Consumer
consumer )
{
using namespace std;
using err_t = utf8_exception::err_t;
Iterator it = begin;
auto further = [&]<typename ... Chars>( Chars &... chars ) -> err_t
{
if constexpr( !Zero )
{
if( end - it < sizeof ...(Chars) ) [[unlikely]]
return err_t::LENGTH_ERROR;
if( ((((chars = *it++) & 0xC0) == 0x80) & ...) ) [[likely]]
return err_t::NO_ERROR;
else
return err_t::ENCODING_ERROR;
}
else
{
if( ((((chars = *it++) & 0xC0) == 0x80) && ...) ) [[likely]]
return err_t::NO_ERROR;
else
{
err_t err;
((bool)(err = !chars ? err_t::LENGTH_ERROR : (chars & 0xC0) != 0x80
? err_t::ENCODING_ERROR : err_t::NO_ERROR) || ...);
return err;
}
}
};
auto juice = []( char8_t c, unsigned bits ) -> char32_t { return c &
0xFF >> 8 - bits; };
auto ret = []( err_t err ) -> err_t
{
if constexpr( !Throw )
return err;
else
throw utf8_exception( err );
};
char8_t c0, c1, c2, c3;
utf8_exception::err_t err;
constexpr char32_t UNICODE_MAX = 0x10FFFF;
for( char32_t c32; !Zero && begin != end || Zero; consumer( c32 ) )
if( (signed char)(c0 = *it++) >= 0 ) [[likely]]
if constexpr( !Zero )
c32 = c0;
else
if( c0 ) [[likely]]
c32 = c0;
else
break;
else if( (c0 & 0xE0) == 0xC0 ) [[likely]]
{
if( (bool)(err = further( c1 )) ) [[unlikely]]
return ret( err );
c32 = juice( c0, 5 ) << 6 | juice( c1, 6 );
if( c32 > UNICODE_MAX) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
}
else if( (c0 & 0xF0) == 0xE0 ) [[likely]]
{
if( (bool)(err = further( c1, c2 )) ) [[unlikely]]
return ret( err );
c32 = juice( c0, 4 ) << 12 | juice( c1, 6 ) << 6 | juice( c2, 6 );
if( c32 > UNICODE_MAX ) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
}
else
{
if( (c0 & 0xF8) != 0xF0 ) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
if( (bool)(err = further( c1, c2, c3 )) ) [[unlikely]]
return ret( err );
c32 = juice( c0, 3 ) << 18 | juice( c1, 6 ) << 12 | juice( c2, 6 ) <<
6 | juice( c3, 6 );
if( c32 > UNICODE_MAX ) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
}
return utf8_exception::err_t::NO_ERROR;
}
0 new messages