Boost.Locale
utf8_codecvt.hpp
1 //
2 // Copyright (c) 2015 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0.
5 // https://www.boost.org/LICENSE_1_0.txt
6 
7 #ifndef BOOST_LOCALE_UTF8_CODECVT_HPP
8 #define BOOST_LOCALE_UTF8_CODECVT_HPP
9 
10 #include <boost/locale/utf.hpp>
11 #include <boost/locale/generic_codecvt.hpp>
12 #include <boost/cstdint.hpp>
13 #include <locale>
14 
15 namespace boost {
16 namespace locale {
17 
21 template<typename CharType>
22 class utf8_codecvt : public generic_codecvt<CharType,utf8_codecvt<CharType> >
23 {
24 public:
25 
26  struct state_type {};
27 
28  utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType,utf8_codecvt<CharType> >(refs)
29  {
30  }
31 
32  static int max_encoding_length()
33  {
34  return 4;
35  }
36 
37  static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
38  {
39  return state_type();
40  }
41  static utf::code_point to_unicode(state_type &,char const *&begin,char const *end)
42  {
43  char const *p=begin;
44 
46  if(c!=utf::illegal && c!=utf::incomplete)
47  begin = p;
48  return c;
49  }
50 
51  static utf::code_point from_unicode(state_type &,utf::code_point u,char *begin,char const *end)
52  {
54  return utf::illegal;
55  int width;
56  if((width=utf::utf_traits<char>::width(u)) > end - begin)
57  return utf::incomplete;
59  return width;
60  }
61 };
62 
63 } // locale
64 } // namespace boost
65 
66 #endif
67 
static code_point decode(Iterator &p, Iterator e)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:49
static Iterator encode(code_point value, Iterator out)
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:44
Geneneric utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t,...
Definition: utf8_codecvt.hpp:22
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:34
initial_convertion_state
Definition: generic_codecvt.hpp:38
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:39
static int width(code_point value)
Definition: utf8_codecvt.hpp:26
Geneneric generic codecvt facet, various stateless encodings to UTF-16 and UTF-32 using wchar_t,...
Definition: generic_codecvt.hpp:139