Electroneum
Language Namespace Reference

Mnemonic language related namespace. More...

Classes

class  Base
 A base language class which all languages have to inherit from for Polymorphism. More...
 
class  Chinese_Simplified
 
class  Dutch
 
class  English
 
class  EnglishOld
 
class  Esperanto
 
class  French
 
class  German
 
class  Italian
 
class  Japanese
 
class  Lojban
 
class  Portuguese
 
class  Russian
 
class  Singleton
 Single helper class. More...
 
class  Spanish
 
struct  WordEqual
 
struct  WordHash
 

Functions

template<typename T >
T utf8prefix (const T &s, size_t count)
 Returns a string made of (at most) the first count characters in s. Assumes well formedness. No check is made for this. More...
 
template<typename T >
T utf8canonical (const T &s)
 

Detailed Description

Mnemonic language related namespace.

Function Documentation

◆ utf8canonical()

template<typename T >
T Language::utf8canonical ( const T s)
inline

Definition at line 78 of file language_base.h.

79  {
80  T sc = "";
81  size_t avail = s.size();
82  const char *ptr = s.data();
83  wint_t cp = 0;
84  int bytes = 1;
85  char wbuf[8], *wptr;
86  while (avail--)
87  {
88  if ((*ptr & 0x80) == 0)
89  {
90  cp = *ptr++;
91  bytes = 1;
92  }
93  else if ((*ptr & 0xe0) == 0xc0)
94  {
95  if (avail < 1)
96  throw std::runtime_error("Invalid UTF-8");
97  cp = (*ptr++ & 0x1f) << 6;
98  cp |= *ptr++ & 0x3f;
99  --avail;
100  bytes = 2;
101  }
102  else if ((*ptr & 0xf0) == 0xe0)
103  {
104  if (avail < 2)
105  throw std::runtime_error("Invalid UTF-8");
106  cp = (*ptr++ & 0xf) << 12;
107  cp |= (*ptr++ & 0x3f) << 6;
108  cp |= *ptr++ & 0x3f;
109  avail -= 2;
110  bytes = 3;
111  }
112  else if ((*ptr & 0xf8) == 0xf0)
113  {
114  if (avail < 3)
115  throw std::runtime_error("Invalid UTF-8");
116  cp = (*ptr++ & 0x7) << 18;
117  cp |= (*ptr++ & 0x3f) << 12;
118  cp |= (*ptr++ & 0x3f) << 6;
119  cp |= *ptr++ & 0x3f;
120  avail -= 3;
121  bytes = 4;
122  }
123  else
124  throw std::runtime_error("Invalid UTF-8");
125 
126  cp = std::towlower(cp);
127  wptr = wbuf;
128  switch (bytes)
129  {
130  case 1: *wptr++ = cp; break;
131  case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break;
132  case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
133  case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
134  default: throw std::runtime_error("Invalid UTF-8");
135  }
136  *wptr = 0;
137  sc += T(wbuf, bytes);
138  cp = 0;
139  bytes = 1;
140  }
141  return sc;
142  }
const uint32_t T[512]
Here is the caller graph for this function:

◆ utf8prefix()

template<typename T >
T Language::utf8prefix ( const T s,
size_t  count 
)
inline

Returns a string made of (at most) the first count characters in s. Assumes well formedness. No check is made for this.

Parameters
sThe string from which to return the first count characters.
countHow many characters to return.
Returns
A string consisting of the first count characters in s.

Definition at line 60 of file language_base.h.

61  {
62  T prefix = "";
63  size_t avail = s.size();
64  const char *ptr = s.data();
65  while (count-- && avail--)
66  {
67  prefix += *ptr++;
68  while (avail && ((*ptr) & 0xc0) == 0x80)
69  {
70  prefix += *ptr++;
71  --avail;
72  }
73  }
74  return prefix;
75  }
const uint32_t T[512]
mdb_size_t count(MDB_cursor *cur)
Here is the call graph for this function:
Here is the caller graph for this function: