diff --git a/README.md b/README.md index fb7ac3453..ffed0b258 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,10 @@ building the library or including the function definitions: #define BOOST_JSON_STACK_BUFFER_SIZE 1024 #include ``` +### Endianness + +Boost.JSON uses [Boost.Endian](https://www.boost.org/doc/libs/release/libs/endian/doc/html/endian.html) +in order to support both little endian and big endian platforms. ### Supported Compilers diff --git a/doc/qbk/overview.qbk b/doc/qbk/overview.qbk index 9de3cd724..d4f564b8d 100644 --- a/doc/qbk/overview.qbk +++ b/doc/qbk/overview.qbk @@ -120,6 +120,12 @@ building the library or including the function definitions: #include ``` +[heading Endianness] + +Boost.JSON uses +[@https://www.boost.org/doc/libs/release/libs/endian/doc/html/endian.html +Boost.Endian] in order to support both little endian and big endian platforms. + [heading Supported Compilers] Boost.JSON has been tested with the following compilers: diff --git a/include/boost/json/detail/sse2.hpp b/include/boost/json/detail/sse2.hpp index 06657bc18..3b979c467 100644 --- a/include/boost/json/detail/sse2.hpp +++ b/include/boost/json/detail/sse2.hpp @@ -136,7 +136,7 @@ count_valid( uint8_t len = first & 0xFF; if(BOOST_JSON_UNLIKELY(end - p < len)) break; - if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) + if(BOOST_JSON_UNLIKELY(! is_valid_utf8_no_inline(p, first))) break; p += len; } @@ -185,7 +185,7 @@ count_valid( uint8_t len = first & 0xFF; if(BOOST_JSON_UNLIKELY(end - p < len)) break; - if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) + if(BOOST_JSON_UNLIKELY(! is_valid_utf8_no_inline(p, first))) break; p += len; } diff --git a/include/boost/json/detail/utf8.hpp b/include/boost/json/detail/utf8.hpp index 74434773e..0a0fea2b2 100644 --- a/include/boost/json/detail/utf8.hpp +++ b/include/boost/json/detail/utf8.hpp @@ -25,10 +25,8 @@ template std::uint32_t load_little_endian(void const* p) { - std::uint32_t v = 0; - std::memcpy(&v, p, N); - endian::little_to_native_inplace(v); - return v; + auto const up = reinterpret_cast(p); + return endian::endian_load(up); } inline @@ -70,7 +68,7 @@ inline bool is_valid_utf8(const char* p, uint16_t first) { - uint32_t v; + std::uint32_t v; switch(first >> 8) { default: @@ -81,38 +79,46 @@ is_valid_utf8(const char* p, uint16_t first) v = load_little_endian<2>(p); return (v & 0xC000) == 0x8000; - // 3 bytes, second byte [A0, BF] - case 2: + // 3 bytes, second byte [A0, BF] + case 2: v = load_little_endian<3>(p); return (v & 0xC0E000) == 0x80A000; - // 3 bytes, second byte [80, BF] - case 3: + // 3 bytes, second byte [80, BF] + case 3: v = load_little_endian<3>(p); return (v & 0xC0C000) == 0x808000; - // 3 bytes, second byte [80, 9F] - case 4: + // 3 bytes, second byte [80, 9F] + case 4: v = load_little_endian<3>(p); return (v & 0xC0E000) == 0x808000; - // 4 bytes, second byte [90, BF] - case 5: + // 4 bytes, second byte [90, BF] + case 5: v = load_little_endian<4>(p); return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00; - // 4 bytes, second byte [80, BF] - case 6: + // 4 bytes, second byte [80, BF] + case 6: v = load_little_endian<4>(p); return (v & 0xC0C0C000) == 0x80808000; - // 4 bytes, second byte [80, 8F] - case 7: + // 4 bytes, second byte [80, 8F] + case 7: v = load_little_endian<4>(p); return (v & 0xC0C0F000) == 0x80808000; } } +BOOST_NOINLINE +inline +bool +is_valid_utf8_no_inline(const char* p, uint16_t first) +{ + return is_valid_utf8(p, first); +} + class utf8_sequence { char seq_[4];