summaryrefslogtreecommitdiff
path: root/vendor/jsoncons-0.104.0/jsoncons/detail/unicode_traits.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/jsoncons-0.104.0/jsoncons/detail/unicode_traits.hpp')
-rw-r--r--vendor/jsoncons-0.104.0/jsoncons/detail/unicode_traits.hpp1463
1 files changed, 1463 insertions, 0 deletions
diff --git a/vendor/jsoncons-0.104.0/jsoncons/detail/unicode_traits.hpp b/vendor/jsoncons-0.104.0/jsoncons/detail/unicode_traits.hpp
new file mode 100644
index 00000000..affbb9b7
--- /dev/null
+++ b/vendor/jsoncons-0.104.0/jsoncons/detail/unicode_traits.hpp
@@ -0,0 +1,1463 @@
+// Copyright 2016 Daniel Parker
+// Distributed under the Boost license, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See https://github.com/danielaparker/unicode_traits for latest version
+
+/*
+ * Includes code derived from Unicode, Inc decomposition code in ConvertUTF.h and ConvertUTF.c
+ * http://www.unicode.org/
+ *
+ * "Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard."
+*/
+
+#ifndef UNICONS_UNICODE_TRAITS_HPP
+#define UNICONS_UNICODE_TRAITS_HPP
+
+#if defined (__clang__)
+#if defined(_GLIBCXX_USE_NOEXCEPT)
+#define UNICONS_NOEXCEPT _GLIBCXX_USE_NOEXCEPT
+#else
+#define UNICONS_NOEXCEPT noexcept
+#endif
+#elif defined(__GNUC__)
+#define UNICONS_NOEXCEPT _GLIBCXX_USE_NOEXCEPT
+#elif defined(_MSC_VER)
+#if _MSC_VER >= 1900
+#define UNICONS_NOEXCEPT noexcept
+#else
+#define UNICONS_NOEXCEPT
+#endif
+#else
+#define UNICONS_NOEXCEPT
+#endif
+
+#include <string>
+#include <iterator>
+#include <type_traits>
+#include <system_error>
+
+namespace unicons {
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence. Source: ConvertUTF.c
+ */
+const uint32_t offsets_from_utf8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow. There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total. Source: ConvertUTF.c
+ */
+const uint8_t first_byte_mark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms. Source: ConvertUTF.c
+ */
+const uint8_t trailing_bytes_for_utf8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+// Some fundamental constants. Source: ConvertUTF.h
+const uint32_t replacement_char = 0x0000FFFD;
+const uint32_t max_bmp = 0x0000FFFF;
+const uint32_t max_utf16 = 0x0010FFFF;
+const uint32_t max_utf32 = 0x7FFFFFFF;
+const uint32_t max_legal_utf32 = 0x0010FFFF;
+
+const int half_shift = 10; // used for shifting by 10 bits
+const uint32_t half_base = 0x0010000UL;
+const uint32_t half_mask = 0x3FFUL;
+
+const uint16_t sur_high_start = 0xD800;
+const uint16_t sur_high_end = 0xDBFF;
+const uint16_t sur_low_start = 0xDC00;
+const uint16_t sur_low_end = 0xDFFF;
+
+inline
+static bool is_continuation_byte(unsigned char ch)
+{
+ return (ch & 0xC0) == 0x80;
+}
+
+inline
+bool is_high_surrogate(uint32_t ch) UNICONS_NOEXCEPT
+{
+ return (ch >= sur_high_start && ch <= sur_high_end);
+}
+
+inline
+bool is_low_surrogate(uint32_t ch) UNICONS_NOEXCEPT
+{
+ return (ch >= sur_low_start && ch <= sur_low_end);
+}
+
+inline
+bool is_surrogate(uint32_t ch) UNICONS_NOEXCEPT
+{
+ return (ch >= sur_high_start && ch <= sur_low_end);
+}
+
+enum class conv_flags
+{
+ strict = 0,
+ lenient
+};
+
+// conv_errc
+
+enum class conv_errc
+{
+ ok = 0,
+ over_long_utf8_sequence = 1, // over long utf8 sequence
+ expected_continuation_byte, // expected continuation byte
+ unpaired_high_surrogate, // unpaired high surrogate UTF-16
+ illegal_surrogate_value, // UTF-16 surrogate values are illegal in UTF-32
+ source_exhausted, // partial character in source, but hit end
+ source_illegal // source sequence is illegal/malformed
+};
+
+class Unicode_traits_error_category_impl_
+ : public std::error_category
+{
+public:
+ virtual const char* name() const UNICONS_NOEXCEPT
+ {
+ return "unicons conversion error";
+ }
+ virtual std::string message(int ev) const
+ {
+ switch (static_cast<conv_errc>(ev))
+ {
+ case conv_errc::over_long_utf8_sequence:
+ return "Over long utf8 sequence";
+ case conv_errc::expected_continuation_byte:
+ return "Expected continuation byte";
+ case conv_errc::unpaired_high_surrogate:
+ return "Unpaired high surrogate UTF-16";
+ case conv_errc::illegal_surrogate_value:
+ return "UTF-16 surrogate values are illegal in UTF-32";
+ case conv_errc::source_exhausted:
+ return "Partial character in source, but hit end";
+ case conv_errc::source_illegal:
+ return "Source sequence is illegal/malformed";
+ default:
+ return "";
+ break;
+ }
+ }
+};
+
+inline
+const std::error_category& unicode_traits_error_category()
+{
+ static Unicode_traits_error_category_impl_ instance;
+ return instance;
+}
+
+inline
+std::error_code make_error_code(conv_errc result)
+{
+ return std::error_code(static_cast<int>(result),unicode_traits_error_category());
+}
+
+// encoding_errc
+
+enum class encoding_errc
+{
+ ok = 0,
+ expected_u8_found_u16 = 1,
+ expected_u8_found_u32,
+ expected_u16_found_fffe,
+ expected_u32_found_fffe
+};
+
+class Encoding_errc_impl_
+ : public std::error_category
+{
+public:
+ virtual const char* name() const UNICONS_NOEXCEPT
+ {
+ return "unicons encoding error";
+ }
+ virtual std::string message(int ev) const
+ {
+ switch (static_cast<encoding_errc>(ev))
+ {
+ case encoding_errc::expected_u8_found_u16:
+ return "Expected UTF-8, found UTF-16";
+ case encoding_errc::expected_u8_found_u32:
+ return "Expected UTF-8, found UTF-32";
+ case encoding_errc::expected_u16_found_fffe:
+ return "Expected UTF-16, found non character";
+ case encoding_errc::expected_u32_found_fffe:
+ return "Expected UTF-32, found non character";
+ default:
+ return "";
+ break;
+ }
+ }
+};
+
+inline
+const std::error_category& encoding_error_category()
+{
+ static Encoding_errc_impl_ instance;
+ return instance;
+}
+
+inline
+std::error_code make_error_code(encoding_errc result)
+{
+ return std::error_code(static_cast<int>(result),encoding_error_category());
+}
+}
+
+namespace std {
+ template<>
+ struct is_error_code_enum<unicons::conv_errc> : public true_type
+ {
+ };
+ template<>
+ struct is_error_code_enum<unicons::encoding_errc> : public true_type
+ {
+ };
+}
+
+namespace unicons {
+
+// utf8
+
+template <class Iterator>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<Iterator>::value_type>::value
+ && sizeof(typename std::iterator_traits<Iterator>::value_type) == sizeof(uint8_t),
+ conv_errc >::type
+is_legal_utf8(Iterator first, size_t length)
+{
+ uint8_t a;
+ Iterator srcptr = first+length;
+ switch (length) {
+ default:
+ return conv_errc::over_long_utf8_sequence;
+ case 4:
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
+ return conv_errc::expected_continuation_byte;
+ // FALLTHRU
+ case 3:
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
+ return conv_errc::expected_continuation_byte;
+ // FALLTHRU
+ case 2:
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
+ return conv_errc::expected_continuation_byte;
+
+ switch (static_cast<uint8_t>(*first))
+ {
+ /* no fall-through in this inner switch */
+ case 0xE0: if (a < 0xA0) return conv_errc::source_illegal; break;
+ case 0xED: if (a > 0x9F) return conv_errc::source_illegal; break;
+ case 0xF0: if (a < 0x90) return conv_errc::source_illegal; break;
+ case 0xF4: if (a > 0x8F) return conv_errc::source_illegal; break;
+ default: if (a < 0x80) return conv_errc::source_illegal;
+ }
+
+ // FALLTHRU
+ case 1:
+ if (static_cast<uint8_t>(*first) >= 0x80 && static_cast<uint8_t>(*first) < 0xC2)
+ return conv_errc::source_illegal;
+ // FALLTHRU
+ }
+ if (static_cast<uint8_t>(*first) > 0xF4)
+ return conv_errc::source_illegal;
+
+ return conv_errc();
+}
+
+template <class...> using void_t = void;
+
+template <class, class, class = void>
+struct is_output_iterator : std::false_type {};
+
+template <class I, class E>
+struct is_output_iterator<I, E, void_t<
+ typename std::iterator_traits<I>::iterator_category,
+ decltype(*std::declval<I>() = std::declval<E>())>> : std::true_type {};
+
+// is_same_size fixes issue with vs2013
+
+// primary template
+template<class T1, class T2, class Enable = void>
+struct is_same_size : std::false_type
+{
+};
+
+// specialization for non void types
+template<class T1, class T2>
+struct is_same_size<T1, T2, typename std::enable_if<!std::is_void<T1>::value && !std::is_void<T2>::value>::type>
+{
+ static const bool value = (sizeof(T1) == sizeof(T2));
+};
+
+template<class OutputIt, class CharT, class Enable = void>
+struct is_compatible_output_iterator : std::false_type {};
+
+template<class OutputIt, class CharT>
+struct is_compatible_output_iterator<OutputIt,CharT,
+ typename std::enable_if<is_output_iterator<OutputIt,CharT>::value
+ && std::is_void<typename std::iterator_traits<OutputIt>::value_type>::value
+ && std::is_integral<typename OutputIt::container_type::value_type>::value
+ && !std::is_void<typename OutputIt::container_type::value_type>::value
+ && is_same_size<typename OutputIt::container_type::value_type,CharT>::value>::type
+> : std::true_type {};
+
+template<class OutputIt, class CharT>
+struct is_compatible_output_iterator<OutputIt,CharT,
+ typename std::enable_if<is_output_iterator<OutputIt,CharT>::value
+ && std::is_integral<typename std::iterator_traits<OutputIt>::value_type>::value
+ && is_same_size<typename std::iterator_traits<OutputIt>::value_type,CharT>::value>::type
+> : std::true_type {};
+
+template<class OutputIt, class CharT>
+struct is_compatible_output_iterator<OutputIt,CharT,
+ typename std::enable_if<is_output_iterator<OutputIt,CharT>::value
+ && std::is_void<typename std::iterator_traits<OutputIt>::value_type>::value
+ && is_same_size<typename OutputIt::char_type,CharT>::value>::type
+> : std::true_type {};
+
+// convert
+
+template <class Iterator>
+struct convert_result
+{
+ Iterator it;
+ conv_errc ec;
+};
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint8_t)
+ && is_compatible_output_iterator<OutputIt,uint8_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last, OutputIt target, conv_flags flags=conv_flags::strict)
+{
+ (void)flags;
+
+ conv_errc result = conv_errc();
+ while (first != last)
+ {
+ size_t length = trailing_bytes_for_utf8[static_cast<uint8_t>(*first)] + 1;
+ if (length > (size_t)(last - first))
+ {
+ return convert_result<InputIt>{first, conv_errc::source_exhausted};
+ }
+ if ((result=is_legal_utf8(first, length)) != conv_errc())
+ {
+ return convert_result<InputIt>{first,result};
+ }
+
+ switch (length) {
+ case 4: *target++ = (static_cast<uint8_t>(*first++));
+ case 3: *target++ = (static_cast<uint8_t>(*first++));
+ case 2: *target++ = (static_cast<uint8_t>(*first++));
+ case 1: *target++ = (static_cast<uint8_t>(*first++));
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint8_t)
+ && is_compatible_output_iterator<OutputIt,uint16_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict)
+{
+ conv_errc result = conv_errc();
+
+ while (first != last)
+ {
+ uint32_t ch = 0;
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*first)];
+ if (extra_bytes_to_read >= last - first)
+ {
+ result = conv_errc::source_exhausted;
+ break;
+ }
+ /* Do this check whether lenient or strict */
+ if ((result=is_legal_utf8(first, extra_bytes_to_read+1)) != conv_errc())
+ {
+ break;
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extra_bytes_to_read) {
+ case 5: ch += static_cast<uint8_t>(*first++); ch <<= 6; /* remember, illegal UTF-8 */
+ case 4: ch += static_cast<uint8_t>(*first++); ch <<= 6; /* remember, illegal UTF-8 */
+ case 3: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 2: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 1: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 0: ch += static_cast<uint8_t>(*first++);
+ }
+ ch -= offsets_from_utf8[extra_bytes_to_read];
+
+ if (ch <= max_bmp) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (is_surrogate(ch) ) {
+ if (flags == conv_flags::strict) {
+ first -= (extra_bytes_to_read+1); /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ } else {
+ *target++ = (replacement_char);
+ }
+ } else {
+ *target++ = ((uint16_t)ch); /* normal case */
+ }
+ } else if (ch > max_utf16) {
+ if (flags == conv_flags::strict) {
+ result = conv_errc::source_illegal;
+ first -= (extra_bytes_to_read+1); /* return to the start */
+ break; /* Bail out; shouldn't continue */
+ } else {
+ *target++ = (replacement_char);
+ }
+ } else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ ch -= half_base;
+ *target++ = ((uint16_t)((ch >> half_shift) + sur_high_start));
+ *target++ = ((uint16_t)((ch & half_mask) + sur_low_start));
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint8_t)
+ && is_compatible_output_iterator<OutputIt,uint32_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict)
+{
+ conv_errc result = conv_errc();
+
+ while (first < last)
+ {
+ uint32_t ch = 0;
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*first)];
+ if (extra_bytes_to_read >= last - first)
+ {
+ result = conv_errc::source_exhausted;
+ break;
+ }
+ /* Do this check whether lenient or strict */
+ if ((result=is_legal_utf8(first, extra_bytes_to_read+1)) != conv_errc()) {
+ break;
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extra_bytes_to_read) {
+ case 5: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 4: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 3: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 2: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 1: ch += static_cast<uint8_t>(*first++); ch <<= 6;
+ case 0: ch += static_cast<uint8_t>(*first++);
+ }
+ ch -= offsets_from_utf8[extra_bytes_to_read];
+
+ if (ch <= max_legal_utf32) {
+ /*
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
+ * over Plane 17 (> 0x10FFFF) is illegal.
+ */
+ if (is_surrogate(ch) ) {
+ if (flags == conv_flags::strict) {
+ first -= (extra_bytes_to_read+1); /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ } else {
+ *target++ = (replacement_char);
+ }
+ } else {
+ *target++ = (ch);
+ }
+ } else { /* i.e., ch > max_legal_utf32 */
+ result = conv_errc::source_illegal;
+ *target++ = (replacement_char);
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+// utf16
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint16_t)
+ && is_compatible_output_iterator<OutputIt,uint8_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict) {
+ conv_errc result = conv_errc();
+ while (first < last) {
+ unsigned short bytes_to_write = 0;
+ const uint32_t byteMask = 0xBF;
+ const uint32_t byteMark = 0x80;
+ uint32_t ch = *first++;
+ /* If we have a surrogate pair, convert to uint32_t first. */
+ if (is_high_surrogate(ch)) {
+ /* If the 16 bits following the high surrogate are in the first buffer... */
+ if (first < last) {
+ uint32_t ch2 = *first;
+ /* If it's a low surrogate, convert to uint32_t. */
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
+ ch = ((ch - sur_high_start) << half_shift)
+ + (ch2 - sur_low_start) + half_base;
+ ++first;
+ } else if (flags == conv_flags::strict) { /* it's an unpaired high surrogate */
+ --first; /* return to the illegal value itself */
+ result = conv_errc::unpaired_high_surrogate;
+ break;
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ --first; /* return to the high surrogate */
+ result = conv_errc::source_exhausted;
+ break;
+ }
+ } else if (flags == conv_flags::strict) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (is_low_surrogate(ch)) {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ }
+ }
+ /* Figure out how many bytes the result will require */
+ if (ch < (uint32_t)0x80) {
+ bytes_to_write = 1;
+ } else if (ch < (uint32_t)0x800) {
+ bytes_to_write = 2;
+ } else if (ch < (uint32_t)0x10000) {
+ bytes_to_write = 3;
+ } else if (ch < (uint32_t)0x110000) {
+ bytes_to_write = 4;
+ } else {
+ bytes_to_write = 3;
+ ch = replacement_char;
+ }
+
+ uint8_t byte1 = 0;
+ uint8_t byte2 = 0;
+ uint8_t byte3 = 0;
+ uint8_t byte4 = 0;
+
+ switch (bytes_to_write) { // note: everything falls through
+ case 4: byte4 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: byte3 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: byte2 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: byte1 = (uint8_t)(ch | first_byte_mark[bytes_to_write]);
+ }
+ switch (bytes_to_write)
+ {
+ case 4:
+ *target++ = (byte1);
+ *target++ = (byte2);
+ *target++ = (byte3);
+ *target++ = (byte4);
+ break;
+ case 3:
+ *target++ = (byte1);
+ *target++ = (byte2);
+ *target++ = (byte3);
+ break;
+ case 2:
+ *target++ = (byte1);
+ *target++ = (byte2);
+ break;
+ case 1:
+ *target++ = (byte1);
+ break;
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint16_t)
+ && is_compatible_output_iterator<OutputIt,uint16_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict)
+{
+ conv_errc result = conv_errc();
+
+ while (first != last)
+ {
+ uint32_t ch = *first++;
+ /* If we have a surrogate pair, convert to uint32_t first. */
+ if (is_high_surrogate(ch))
+ {
+ /* If the 16 bits following the high surrogate are in the first buffer... */
+ if (first < last) {
+ uint32_t ch2 = *first;
+ /* If it's a low surrogate, */
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
+ *target++ = ((uint16_t)ch);
+ *target++ = ((uint16_t)ch2);
+ ++first;
+ } else if (flags == conv_flags::strict) { /* it's an unpaired high surrogate */
+ --first; /* return to the illegal value itself */
+ result = conv_errc::unpaired_high_surrogate;
+ break;
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ --first; /* return to the high surrogate */
+ result = conv_errc::source_exhausted;
+ break;
+ }
+ } else if (is_low_surrogate(ch))
+ {
+ // illegal leading low surrogate
+ if (flags == conv_flags::strict) {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ }
+ else
+ {
+ *target++ = ((uint16_t)ch);
+ }
+ }
+ else
+ {
+ *target++ = ((uint16_t)ch);
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint16_t)
+ && is_compatible_output_iterator<OutputIt,uint32_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict)
+{
+ conv_errc result = conv_errc();
+
+ while (first != last)
+ {
+ uint32_t ch = *first++;
+ /* If we have a surrogate pair, convert to UTF32 first. */
+ if (is_high_surrogate(ch)) {
+ /* If the 16 bits following the high surrogate are in the first buffer... */
+ if (first < last) {
+ uint32_t ch2 = *first;
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end ) {
+ ch = ((ch - sur_high_start) << half_shift)
+ + (ch2 - sur_low_start) + half_base;
+ ++first;
+ } else if (flags == conv_flags::strict) { /* it's an unpaired high surrogate */
+ --first; /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ --first; /* return to the high surrogate */
+ result = conv_errc::source_exhausted;
+ break;
+ }
+ } else if (flags == conv_flags::strict) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (is_low_surrogate(ch) ) {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ }
+ }
+ *target++ = (ch);
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+// utf32
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint32_t)
+ && is_compatible_output_iterator<OutputIt,uint8_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict)
+{
+ conv_errc result = conv_errc();
+ while (first < last) {
+ unsigned short bytes_to_write = 0;
+ const uint32_t byteMask = 0xBF;
+ const uint32_t byteMark = 0x80;
+ uint32_t ch = *first++;
+ if (flags == conv_flags::strict ) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (is_surrogate(ch)) {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::illegal_surrogate_value;
+ break;
+ }
+ }
+ /*
+ * Figure out how many bytes the result will require. Turn any
+ * illegally large UTF32 things (> Plane 17) into replacement chars.
+ */
+ if (ch < (uint32_t)0x80) { bytes_to_write = 1;
+ } else if (ch < (uint32_t)0x800) { bytes_to_write = 2;
+ } else if (ch < (uint32_t)0x10000) { bytes_to_write = 3;
+ } else if (ch <= max_legal_utf32) { bytes_to_write = 4;
+ } else {
+ bytes_to_write = 3;
+ ch = replacement_char;
+ result = conv_errc::source_illegal;
+ }
+
+ uint8_t byte1 = 0;
+ uint8_t byte2 = 0;
+ uint8_t byte3 = 0;
+ uint8_t byte4 = 0;
+
+ switch (bytes_to_write) {
+ case 4:
+ byte4 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
+ // FALLTHRU
+ case 3:
+ byte3 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
+ // FALLTHRU
+ case 2:
+ byte2 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
+ // FALLTHRU
+ case 1:
+ byte1 = (uint8_t) (ch | first_byte_mark[bytes_to_write]);
+ }
+
+ switch (bytes_to_write)
+ {
+ case 4:
+ *target++ = (byte1);
+ *target++ = (byte2);
+ *target++ = (byte3);
+ *target++ = (byte4);
+ break;
+ case 3:
+ *target++ = (byte1);
+ *target++ = (byte2);
+ *target++ = (byte3);
+ break;
+ case 2:
+ *target++ = (byte1);
+ *target++ = (byte2);
+ break;
+ case 1:
+ *target++ = (byte1);
+ break;
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint32_t)
+ && is_compatible_output_iterator<OutputIt,uint16_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict)
+{
+ conv_errc result = conv_errc();
+
+ while (first != last)
+ {
+ uint32_t ch = *first++;
+ if (ch <= max_bmp) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+ if (is_surrogate(ch) ) {
+ if (flags == conv_flags::strict) {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ } else {
+ *target++ = (replacement_char);
+ }
+ } else {
+ *target++ = ((uint16_t)ch); /* normal case */
+ }
+ } else if (ch > max_legal_utf32) {
+ if (flags == conv_flags::strict) {
+ result = conv_errc::source_illegal;
+ } else {
+ *target++ = (replacement_char);
+ }
+ } else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ ch -= half_base;
+ *target++ = ((uint16_t)((ch >> half_shift) + sur_high_start));
+ *target++ = ((uint16_t)((ch & half_mask) + sur_low_start));
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+template <class InputIt,class OutputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint32_t)
+ && is_compatible_output_iterator<OutputIt,uint32_t>::value,convert_result<InputIt>>::type
+convert(InputIt first, InputIt last,
+ OutputIt target,
+ conv_flags flags = conv_flags::strict)
+{
+ conv_errc result = conv_errc();
+
+ while (first != last)
+ {
+ uint32_t ch = *first++;
+ if (flags == conv_flags::strict ) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (is_surrogate(ch)) {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::illegal_surrogate_value;
+ break;
+ }
+ }
+ if (ch <= max_legal_utf32)
+ {
+ *target++ = (ch);
+ }
+ else
+ {
+ *target++ = (replacement_char);
+ result = conv_errc::source_illegal;
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+// validate
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint8_t)
+ ,convert_result<InputIt>>::type
+validate(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ conv_errc result = conv_errc();
+ while (first != last)
+ {
+ size_t length = trailing_bytes_for_utf8[static_cast<uint8_t>(*first)] + 1;
+ if (length > (size_t)(last - first))
+ {
+ return convert_result<InputIt>{first, conv_errc::source_exhausted};
+ }
+ if ((result=is_legal_utf8(first, length)) != conv_errc())
+ {
+ return convert_result<InputIt>{first,result} ;
+ }
+ first += length;
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+// utf16
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint16_t)
+ ,convert_result<InputIt>>::type
+validate(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ conv_errc result = conv_errc();
+
+ while (first != last)
+ {
+ uint32_t ch = *first++;
+ /* If we have a surrogate pair, validate to uint32_t first. */
+ if (is_high_surrogate(ch))
+ {
+ /* If the 16 bits following the high surrogate are in the first buffer... */
+ if (first < last) {
+ uint32_t ch2 = *first;
+ /* If it's a low surrogate, */
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
+ ++first;
+ } else {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::unpaired_high_surrogate;
+ break;
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ --first; /* return to the high surrogate */
+ result = conv_errc::source_exhausted;
+ break;
+ }
+ } else if (is_low_surrogate(ch))
+ {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ --first; /* return to the illegal value itself */
+ result = conv_errc::source_illegal;
+ break;
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+
+// utf32
+
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint32_t)
+ ,convert_result<InputIt>>::type
+validate(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ conv_errc result = conv_errc();
+
+ while (first != last)
+ {
+ uint32_t ch = *first++;
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (is_surrogate(ch)) {
+ --first; /* return to the illegal value itself */
+ result = conv_errc::illegal_surrogate_value;
+ break;
+ }
+ if (!(ch <= max_legal_utf32))
+ {
+ result = conv_errc::source_illegal;
+ }
+ }
+ return convert_result<InputIt>{first,result} ;
+}
+
+// sequence
+
+template <class Iterator>
+class sequence
+{
+ Iterator first_;
+ size_t length_;
+public:
+ sequence(Iterator first, size_t length)
+ : first_(first), length_(length)
+ {
+ }
+
+ Iterator begin() const
+ {
+ return first_;
+ }
+
+ size_t length() const
+ {
+ return length_;
+ }
+
+ template <class CharT = typename std::iterator_traits<Iterator>::value_type>
+ typename std::enable_if<sizeof(CharT) == sizeof(uint8_t),uint32_t>::type
+ codepoint() const UNICONS_NOEXCEPT
+ {
+ uint32_t ch = 0;
+ Iterator it = first_;
+ switch (length_)
+ {
+ default:
+ return replacement_char;
+ break;
+ case 4:
+ ch += static_cast<uint8_t>(*it++); ch <<= 6;
+ // FALLTHRU
+ case 3:
+ ch += static_cast<uint8_t>(*it++); ch <<= 6;
+ // FALLTHRU
+ case 2:
+ ch += static_cast<uint8_t>(*it++); ch <<= 6;
+ // FALLTHRU
+ case 1:
+ ch += static_cast<uint8_t>(*it++);
+ ch -= offsets_from_utf8[length_ - 1];
+ break;
+ }
+ if (ch <= max_legal_utf32)
+ {
+ if (is_surrogate(ch))
+ {
+ ch = replacement_char;
+ }
+ }
+ else // ch > max_legal_utf32
+ {
+ ch = replacement_char;
+ }
+ return ch;
+ }
+
+ template <class CharT = typename std::iterator_traits<Iterator>::value_type>
+ typename std::enable_if<sizeof(CharT) == sizeof(uint16_t),uint32_t>::type
+ codepoint() const UNICONS_NOEXCEPT
+ {
+ if (length_ == 0)
+ {
+ return replacement_char;
+ }
+ if (length_ == 2)
+ {
+ uint32_t ch = *first_;
+ uint32_t ch2 = *(first_+ 1);
+ ch = ((ch - sur_high_start) << half_shift)
+ + (ch2 - sur_low_start) + half_base;
+ return ch;
+ }
+ else
+ {
+ return *first_;
+ }
+ }
+
+ template <class CharT = typename std::iterator_traits<Iterator>::value_type>
+ typename std::enable_if<sizeof(CharT) == sizeof(uint32_t),uint32_t>::type
+ codepoint() const UNICONS_NOEXCEPT
+ {
+ if (length_ == 0)
+ {
+ return replacement_char;
+ }
+ return *(first_);
+ }
+};
+
+// sequence_generator
+
+template <class Iterator>
+class sequence_generator
+{
+ Iterator begin_;
+ Iterator last_;
+ conv_flags flags_;
+ size_t length_;
+ conv_errc err_cd_;
+public:
+ typedef sequence<Iterator> sequence_type;
+
+ sequence_generator(Iterator first, Iterator last,
+ conv_flags flags = conv_flags::strict) UNICONS_NOEXCEPT
+ : begin_(first), last_(last), flags_(flags),
+ length_(0), err_cd_(conv_errc())
+ {
+ next();
+ }
+
+ bool done() const UNICONS_NOEXCEPT
+ {
+ return err_cd_ != conv_errc() || begin_ == last_;
+ }
+
+ conv_errc status() const UNICONS_NOEXCEPT
+ {
+ return err_cd_;
+ }
+
+ sequence_type get() const UNICONS_NOEXCEPT
+ {
+ return sequence<Iterator>(begin_,length_);
+ }
+
+ template <class CharT = typename std::iterator_traits<Iterator>::value_type>
+ typename std::enable_if<sizeof(CharT) == sizeof(uint8_t)>::type
+ next() UNICONS_NOEXCEPT
+ {
+ begin_ += length_;
+ if (begin_ != last_)
+ {
+ size_t length = trailing_bytes_for_utf8[static_cast<uint8_t>(*begin_)] + 1;
+ if (length > (size_t)(last_ - begin_))
+ {
+ err_cd_ = conv_errc::source_exhausted;
+ }
+ else if ((err_cd_ = is_legal_utf8(begin_, length)) != conv_errc())
+ {
+ }
+ else
+ {
+ length_ = length;
+ }
+ }
+ }
+
+ template <class CharT = typename std::iterator_traits<Iterator>::value_type>
+ typename std::enable_if<sizeof(CharT) == sizeof(uint16_t)>::type
+ next() UNICONS_NOEXCEPT
+ {
+ begin_ += length_;
+ if (begin_ != last_)
+ {
+ if (begin_ != last_)
+ {
+
+ Iterator it = begin_;
+
+ uint32_t ch = *it++;
+ /* If we have a surrogate pair, validate to uint32_t it. */
+ if (is_high_surrogate(ch))
+ {
+ /* If the 16 bits following the high surrogate are in the it buffer... */
+ if (it < last_) {
+ uint32_t ch2 = *it;
+ /* If it's a low surrogate, */
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end)
+ {
+ ++it;
+ length_ = 2;
+ }
+ else
+ {
+ err_cd_ = conv_errc::unpaired_high_surrogate;
+ }
+ }
+ else
+ {
+ // We don't have the 16 bits following the high surrogate.
+ err_cd_ = conv_errc::source_exhausted;
+ }
+ }
+ else if (is_low_surrogate(ch))
+ {
+ /* leading low surrogate */
+ err_cd_ = conv_errc::source_illegal;
+ }
+ else
+ {
+ length_ = 1;
+ }
+ }
+ }
+ }
+
+ template <class CharT = typename std::iterator_traits<Iterator>::value_type>
+ typename std::enable_if<sizeof(CharT) == sizeof(uint32_t)>::type
+ next() UNICONS_NOEXCEPT
+ {
+ begin_ += length_;
+ length_ = 1;
+ }
+};
+
+template <class Iterator>
+sequence_generator<Iterator> make_sequence_generator(Iterator first, Iterator last,
+ conv_flags flags = conv_flags::strict)
+{
+ return sequence_generator<Iterator>(first, last, flags);
+}
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value
+ && (sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint8_t) || sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint16_t)),
+ sequence<InputIt>>::type
+sequence_at(InputIt first, InputIt last, size_t index)
+{
+ sequence_generator<InputIt> g(first, last, unicons::conv_flags::strict);
+
+ size_t count = 0;
+ while (!g.done() && count < index)
+ {
+ g.next();
+ ++count;
+ }
+ return (!g.done() && count == index) ? g.get() : sequence<InputIt>(last,0);
+}
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint32_t),
+ sequence<InputIt>>::type
+sequence_at(InputIt first, InputIt last, size_t index)
+{
+ size_t size = std::distance(first,last);
+ return index < size ? sequence<InputIt>(first+index,1) : sequence<InputIt>(last,0);
+}
+
+// u8_length
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint8_t),size_t>::type
+u8_length(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ return std::distance(first,last);
+}
+
+// utf16
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint16_t),size_t>::type
+u8_length(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ conv_flags flags = conv_flags::strict;
+ size_t count = 0;
+ for (InputIt p = first; p != last; ++p)
+ {
+ uint32_t ch = *p;
+ if (is_high_surrogate(ch)) {
+ /* If the 16 bits following the high surrogate are in the p buffer... */
+ if (p < last) {
+ uint32_t ch2 = *(++p);
+ /* If it's a low surrogate, convert to uint32_t. */
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
+ ch = ((ch - sur_high_start) << half_shift)
+ + (ch2 - sur_low_start) + half_base;
+
+ } else if (flags == conv_flags::strict) { /* it's an unpaired high surrogate */
+ break;
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ break;
+ }
+ } else if (flags == conv_flags::strict) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (is_low_surrogate(ch)) {
+ break;
+ }
+ }
+ if (ch < (uint32_t)0x80) {
+ ++count;
+ } else if (ch < (uint32_t)0x800) {
+ count += 2;
+ } else if (ch < (uint32_t)0x10000) {
+ count += 3;
+ } else if (ch < (uint32_t)0x110000) {
+ count += 4;
+ } else {
+ count += 3;
+ }
+ }
+ return count;
+}
+
+
+// utf32
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint32_t),size_t>::type
+u8_length(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ size_t count = 0;
+ for (InputIt p = first; p < last; ++p)
+ {
+ uint32_t ch = *p;
+ if (ch < (uint32_t)0x80) {
+ ++count;
+ } else if (ch < (uint32_t)0x800) {
+ count += 2;
+ } else if (ch < (uint32_t)0x10000) {
+ count += 3;
+ } else if (ch <= max_legal_utf32) {
+ count += 4;
+ } else {
+ count += 3;
+ }
+ }
+ return count;
+}
+
+// u32_length
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value
+ && (sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint8_t) || sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint16_t)),
+ size_t>::type
+u32_length(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ sequence_generator<InputIt> g(first, last, unicons::conv_flags::strict);
+
+ size_t count = 0;
+ while (!g.done())
+ {
+ g.next();
+ ++count;
+ }
+ return count;
+}
+
+template <class InputIt>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<InputIt>::value_type>::value && sizeof(typename std::iterator_traits<InputIt>::value_type) == sizeof(uint32_t),
+ size_t>::type
+u32_length(InputIt first, InputIt last) UNICONS_NOEXCEPT
+{
+ return std::distance(first,last);
+}
+
+enum class encoding {u8,u16le,u16be,u32le,u32be,undetected};
+
+template <class Iterator>
+struct detect_encoding_result
+{
+ Iterator it;
+ encoding ec;
+};
+
+template <class Iterator>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<Iterator>::value_type>::value && sizeof(typename std::iterator_traits<Iterator>::value_type) == sizeof(uint8_t),
+ detect_encoding_result<Iterator>>::type
+detect_encoding(Iterator first, Iterator last) UNICONS_NOEXCEPT
+{
+ Iterator it1 = first;
+ if (std::distance(first,last) < 4)
+ {
+ if (std::distance(first,last) == 3)
+ {
+ Iterator it2 = ++first;
+ Iterator it3 = ++first;
+ if (static_cast<uint8_t>(*it1) == 0xEF && static_cast<uint8_t>(*it2) == 0xBB && static_cast<uint8_t>(*it3) == 0xBF)
+ {
+ return detect_encoding_result<Iterator>{last,encoding::u8};
+ }
+ }
+ return detect_encoding_result<Iterator>{it1,encoding::undetected};
+ }
+ else
+ {
+ Iterator it2 = ++first;
+ Iterator it3 = ++first;
+ Iterator it4 = ++first;
+
+ uint32_t bom = static_cast<uint8_t>(*it1) | (static_cast<uint8_t>(*it2) << 8) | (static_cast<uint8_t>(*it3) << 16) | (static_cast<uint8_t>(*it4) << 24);
+ if (bom == 0xFFFE0000)
+ {
+ return detect_encoding_result<Iterator>{it4++,encoding::u32be};
+ }
+ else if (bom == 0x0000FEFF)
+ {
+ return detect_encoding_result<Iterator>{first,encoding::u32le};
+ }
+ else if ((bom & 0xFFFF) == 0xFFFE)
+ {
+ return detect_encoding_result<Iterator>{it3,encoding::u16be};
+ }
+ else if ((bom & 0xFFFF) == 0xFEFF)
+ {
+ return detect_encoding_result<Iterator>{it3,encoding::u16le};
+ }
+ else if ((bom & 0xFFFFFF) == 0xBFBBEF)
+ {
+ return detect_encoding_result<Iterator>{it4,encoding::u8};
+ }
+ else
+ {
+ uint32_t pattern = (static_cast<uint8_t>(*it1) ? 1 : 0) | (static_cast<uint8_t>(*it2) ? 2 : 0) | (static_cast<uint8_t>(*it3) ? 4 : 0) | (static_cast<uint8_t>(*it4) ? 8 : 0);
+ switch (pattern) {
+ case 0x08:
+ return detect_encoding_result<Iterator>{it1,encoding::u32be};
+ case 0x0A:
+ return detect_encoding_result<Iterator>{it1,encoding::u16be};
+ case 0x01:
+ return detect_encoding_result<Iterator>{it1,encoding::u32le};
+ case 0x05:
+ return detect_encoding_result<Iterator>{it1,encoding::u16le};
+ case 0x0F:
+ return detect_encoding_result<Iterator>{it1,encoding::u8};
+ default:
+ return detect_encoding_result<Iterator>{it1,encoding::undetected};
+ }
+ }
+ }
+}
+
+template <class Iterator>
+struct skip_bom_result
+{
+ Iterator it;
+ encoding_errc ec;
+};
+
+template <class Iterator>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<Iterator>::value_type>::value && sizeof(typename std::iterator_traits<Iterator>::value_type) == sizeof(uint8_t),
+ skip_bom_result<Iterator>>::type
+skip_bom(Iterator first, Iterator last) UNICONS_NOEXCEPT
+{
+ auto result = unicons::detect_encoding(first,last);
+ switch (result.ec)
+ {
+ case unicons::encoding::u8:
+ return skip_bom_result<Iterator>{result.it,encoding_errc()};
+ break;
+ case unicons::encoding::u16le:
+ case unicons::encoding::u16be:
+ return skip_bom_result<Iterator>{result.it,encoding_errc::expected_u8_found_u16};
+ break;
+ case unicons::encoding::u32le:
+ case unicons::encoding::u32be:
+ return skip_bom_result<Iterator>{result.it,encoding_errc::expected_u8_found_u32};
+ break;
+ default:
+ return skip_bom_result<Iterator>{result.it,encoding_errc()};
+ break;
+ }
+}
+
+template <class Iterator>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<Iterator>::value_type>::value && sizeof(typename std::iterator_traits<Iterator>::value_type) == sizeof(uint16_t),
+ skip_bom_result<Iterator>>::type
+skip_bom(Iterator first, Iterator last) UNICONS_NOEXCEPT
+{
+ if (first == last)
+ {
+ return skip_bom_result<Iterator>{first,encoding_errc()};
+ }
+ uint16_t bom = static_cast<uint16_t>(*first);
+ if (bom == 0xFEFF)
+ {
+ return skip_bom_result<Iterator>{++first,encoding_errc()};
+ }
+ else if (bom == 0xFFFE)
+ {
+ return skip_bom_result<Iterator>{last,encoding_errc::expected_u16_found_fffe};
+ }
+ else
+ {
+ return skip_bom_result<Iterator>{first,encoding_errc()};
+ }
+}
+
+template <class Iterator>
+typename std::enable_if<std::is_integral<typename std::iterator_traits<Iterator>::value_type>::value && sizeof(typename std::iterator_traits<Iterator>::value_type) == sizeof(uint32_t),
+ skip_bom_result<Iterator>>::type
+skip_bom(Iterator first, Iterator last) UNICONS_NOEXCEPT
+{
+ if (first == last)
+ {
+ return skip_bom_result<Iterator>{first,encoding_errc()};
+ }
+ uint32_t bom = static_cast<uint32_t>(*first);
+ if (bom == 0xFEFF0000)
+ {
+ return skip_bom_result<Iterator>{++first,encoding_errc()};
+ }
+ else if (bom == 0xFFFE0000)
+ {
+ return skip_bom_result<Iterator>{last,encoding_errc::expected_u32_found_fffe};
+ }
+ else
+ {
+ return skip_bom_result<Iterator>{first,encoding_errc()};
+ }
+}
+
+}
+
+#endif
+