7 #ifndef BOOST_LOCALE_UTF_HPP_INCLUDED 8 #define BOOST_LOCALE_UTF_HPP_INCLUDED 10 #include <boost/locale/config.hpp> 11 #include <boost/cstdint.hpp> 23 # define BOOST_LOCALE_LIKELY(x) __builtin_expect((x),1) 24 # define BOOST_LOCALE_UNLIKELY(x) __builtin_expect((x),0) 26 # define BOOST_LOCALE_LIKELY(x) (x) 27 # define BOOST_LOCALE_UNLIKELY(x) (x) 53 if(0xD800 <=v && v<= 0xDFFF)
58 #ifdef BOOST_LOCALE_DOXYGEN 59 template<
typename CharType,
int size=sizeof(CharType)>
82 template<
typename Iterator>
126 template<
typename Iterator>
133 template<
typename Iterator>
139 template<
typename CharType,
int size=sizeof(CharType)>
142 template<
typename CharType>
149 unsigned char c = ci;
152 if(BOOST_LOCALE_UNLIKELY(c < 194))
158 if(BOOST_LOCALE_LIKELY(c <=244))
170 else if(value <=0x7FF) {
173 else if(BOOST_LOCALE_LIKELY(value <=0xFFFF)) {
184 return (c & 0xC0)==0x80;
192 template<
typename Iterator>
195 if(BOOST_LOCALE_UNLIKELY(p==e))
198 unsigned char lead = *p++;
203 if(BOOST_LOCALE_UNLIKELY(trail_size < 0))
213 code_point c = lead & ((1 << (6-trail_size))-1);
219 if(BOOST_LOCALE_UNLIKELY(p==e))
224 c = (c << 6) | ( tmp & 0x3F);
227 if(BOOST_LOCALE_UNLIKELY(p==e))
232 c = (c << 6) | ( tmp & 0x3F);
235 if(BOOST_LOCALE_UNLIKELY(p==e))
240 c = (c << 6) | ( tmp & 0x3F);
249 if(BOOST_LOCALE_UNLIKELY(
width(c)!=trail_size + 1))
256 template<
typename Iterator>
259 unsigned char lead = *p++;
267 else if(BOOST_LOCALE_LIKELY(lead < 240))
272 code_point c = lead & ((1 << (6-trail_size))-1);
276 c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
279 c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
282 c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
290 template<
typename Iterator>
294 *out++ = static_cast<char_type>(value);
296 else if(value <= 0x7FF) {
297 *out++ = static_cast<char_type>((value >> 6) | 0xC0);
298 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
300 else if(BOOST_LOCALE_LIKELY(value <= 0xFFFF)) {
301 *out++ = static_cast<char_type>((value >> 12) | 0xE0);
302 *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
303 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
306 *out++ = static_cast<char_type>((value >> 18) | 0xF0);
307 *out++ = static_cast<char_type>(((value >> 12) & 0x3F) | 0x80);
308 *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
309 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
315 template<
typename CharType>
316 struct utf_traits<CharType,2> {
320 static bool is_first_surrogate(uint16_t x)
322 return 0xD800 <=x && x<= 0xDBFF;
324 static bool is_second_surrogate(uint16_t x)
326 return 0xDC00 <=x && x<= 0xDFFF;
328 static code_point combine_surrogate(uint16_t w1,uint16_t w2)
330 return ((
code_point(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
334 if(is_first_surrogate(c))
336 if(is_second_surrogate(c))
345 return is_second_surrogate(c);
352 return !is_second_surrogate(c);
355 template<
typename It>
358 if(BOOST_LOCALE_UNLIKELY(current == last))
360 uint16_t w1=*current++;
361 if(BOOST_LOCALE_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
368 uint16_t w2=*current++;
369 if(w2 < 0xDC00 || 0xDFFF < w2)
371 return combine_surrogate(w1,w2);
373 template<
typename It>
376 uint16_t w1=*current++;
377 if(BOOST_LOCALE_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
380 uint16_t w2=*current++;
381 return combine_surrogate(w1,w2);
387 return u>=0x10000 ? 2 : 1;
389 template<
typename It>
392 if(BOOST_LOCALE_LIKELY(u<=0xFFFF)) {
393 *out++ = static_cast<char_type>(u);
397 *out++ = static_cast<char_type>(0xD800 | (u>>10));
398 *out++ = static_cast<char_type>(0xDC00 | (u & 0x3FF));
405 template<
typename CharType>
406 struct utf_traits<CharType,4> {
423 template<
typename It>
429 template<
typename It>
432 if(BOOST_LOCALE_UNLIKELY(current == last))
444 template<
typename It>
447 *out++ = static_cast<char_type>(u);
static code_point decode(Iterator &p, Iterator e)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:49
static Iterator encode(code_point value, Iterator out)
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:44
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:34
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:39
UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:63
static int width(code_point value)
CharType char_type
Definition: utf.hpp:67
static int trail_length(char_type c)
static bool is_lead(char_type c)
static code_point decode_valid(Iterator &p)
static bool is_trail(char_type c)
static const int max_width
Definition: utf.hpp:92