18#ifndef NUSPELL_UNICODE_HXX
19#define NUSPELL_UNICODE_HXX
22#include <unicode/utf16.h>
23#include <unicode/utf8.h>
30inline constexpr auto u8_max_cp_length = U8_MAX_LENGTH;
32auto inline u8_is_cp_error(int32_t cp) ->
bool {
return cp < 0; }
35auto u8_advance_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
37 using std::size, std::data;
38#if U_ICU_VERSION_MAJOR_NUM <= 60
39 auto s_ptr = data(str);
41 int32_t len = size(str);
42 U8_NEXT(s_ptr, idx, len, cp);
46 U8_NEXT(str, i, len, cp);
51auto u8_advance_index(
const Range& str,
size_t& i) ->
void
55 U8_FWD_1(str, i, len);
59auto u8_reverse_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
61 using std::size, std::data;
64 U8_PREV(ptr, 0, idx, cp);
69auto u8_reverse_index(
const Range& str,
size_t& i) ->
void
71 using std::size, std::data;
74 U8_BACK_1(ptr, 0, idx);
79auto u8_write_cp_and_advance(Range& buf,
size_t& i, int32_t cp,
bool& error)
82 using std::size, std::data;
83#if U_ICU_VERSION_MAJOR_NUM <= 60
86 int32_t len = size(buf);
87 U8_APPEND(buf, idx, len, cp, error);
91 U8_APPEND(buf, i, len, cp, error);
98auto valid_u8_advance_cp(
const Range& str,
size_t& i,
char32_t& cp) ->
void
100 U8_NEXT_UNSAFE(str, i, cp);
103template <
class Range>
104auto valid_u8_advance_index(
const Range& str,
size_t& i) ->
void
106 U8_FWD_1_UNSAFE(str, i);
109template <
class Range>
110auto valid_u8_reverse_cp(
const Range& str,
size_t& i,
char32_t& cp) ->
void
112 U8_PREV_UNSAFE(str, i, cp);
115template <
class Range>
116auto valid_u8_reverse_index(
const Range& str,
size_t& i) ->
void
118 U8_BACK_1_UNSAFE(str, i);
121template <
class Range>
122auto valid_u8_write_cp_and_advance(Range& buf,
size_t& i,
char32_t cp) ->
void
124 U8_APPEND_UNSAFE(buf, i, cp);
129inline constexpr auto u16_max_cp_length = U16_MAX_LENGTH;
131auto inline u16_is_cp_error(int32_t cp) ->
bool {
return U_IS_SURROGATE(cp); }
133template <
class Range>
134auto u16_advance_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
137 auto len = size(str);
138 U16_NEXT(str, i, len, cp);
141template <
class Range>
142auto u16_advance_index(
const Range& str,
size_t& i) ->
void
145 auto len = size(str);
146 U16_FWD_1(str, i, len);
149template <
class Range>
150auto u16_reverse_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
152 U16_PREV(str, 0, i, cp);
155template <
class Range>
156auto u16_reverse_index(
const Range& str,
size_t& i) ->
void
158 U16_BACK_1(str, 0, i);
161template <
class Range>
162auto u16_write_cp_and_advance(Range& buf,
size_t& i, int32_t cp,
bool& error)
166 auto len = size(buf);
167 U16_APPEND(buf, i, len, cp, error);
172template <
class Range>
173auto valid_u16_advance_cp(
const Range& str,
size_t& i,
char32_t& cp) ->
void
175 U16_NEXT_UNSAFE(str, i, cp);
178template <
class Range>
179auto valid_u16_advance_index(
const Range& str,
size_t& i) ->
void
181 U16_FWD_1_UNSAFE(str, i);
184template <
class Range>
185auto valid_u16_reverse_cp(
const Range& str,
size_t& i,
char32_t& cp) ->
void
187 U16_PREV_UNSAFE(str, i, cp);
190template <
class Range>
191auto valid_u16_reverse_index(
const Range& str,
size_t& i) ->
void
193 U16_BACK_1_UNSAFE(str, i);
196template <
class Range>
197auto valid_u16_write_cp_and_advance(Range& buf,
size_t& i,
char32_t cp) ->
void
199 U16_APPEND_UNSAFE(buf, i, cp);
206 size_t end_i = begin_i;
210 char d[u8_max_cp_length];
215 : sz(pos.end_i - pos.begin_i)
222 }
while (i && --max_len);
227 valid_u8_write_cp_and_advance(d, z, cp);
230 auto size()
const noexcept ->
size_t {
return sz; }
231 auto data()
const noexcept ->
const char* {
return d; }
232 operator std::string_view()
const noexcept
234 return std::string_view(data(), size());
236 auto copy_to(std::string& str,
size_t j)
const
243 }
while (i && --max_len);
247auto inline u8_swap_adjacent_cp(std::string& str,
size_t i1,
size_t i2,
251 auto cp2 = U8_Encoded_CP(str, {i2, i3});
252 auto new_i2 = i1 + std::size(cp2);
253 cp1.copy_to(str, new_i2);
254 cp2.copy_to(str, i1);
258auto inline u8_swap_cp(std::string& str, U8_CP_Pos pos1, U8_CP_Pos pos2)
259 -> std::pair<size_t, size_t>
262 auto cp1 = U8_Encoded_CP(str, pos1);
263 auto cp2 = U8_Encoded_CP(str, pos2);
264 auto new_p1_end_i = pos1.begin_i + size(cp2);
265 auto new_p2_begin_i = pos2.end_i - size(cp1);
266 std::char_traits<char>::move(&str[new_p1_end_i], &str[pos1.end_i],
267 pos2.begin_i - pos1.end_i);
268 cp2.copy_to(str, pos1.begin_i);
269 cp1.copy_to(str, new_p2_begin_i);
270 return {new_p1_end_i, new_p2_begin_i};
292template <
class Range>
293[[nodiscard]]
auto u8_next_cp(
const Range& str,
size_t i) ->
Idx_And_Next_CP
296 u8_advance_cp(str, i, cp);
300template <
class Range>
301[[nodiscard]]
auto u8_next_index(
const Range& str,
size_t i) ->
size_t
303 u8_advance_index(str, i);
307template <
class Range>
308[[nodiscard]]
auto u8_prev_cp(
const Range& str,
size_t i) -> Idx_And_Prev_CP
311 u8_reverse_cp(str, i, cp);
315template <
class Range>
316[[nodiscard]]
auto u8_prev_index(
const Range& str,
size_t i) ->
size_t
318 u8_reverse_index(str, i);
322template <
class Range>
323[[nodiscard]]
auto u8_write_cp(Range& buf,
size_t i, int32_t cp)
324 -> Write_CP_Idx_and_Error
327 u8_write_cp_and_advance(buf, i, cp, err);
343template <
class Range>
344[[nodiscard]]
auto valid_u8_next_cp(
const Range& str,
size_t i)
348 valid_u8_advance_cp(str, i, cp);
352template <
class Range>
353[[nodiscard]]
auto valid_u8_next_index(
const Range& str,
size_t i) ->
size_t
355 valid_u8_advance_index(str, i);
359template <
class Range>
360[[nodiscard]]
auto valid_u8_prev_cp(
const Range& str,
size_t i)
361 -> Idx_And_Prev_CP_Valid
364 valid_u8_reverse_cp(str, i, cp);
368template <
class Range>
369[[nodiscard]]
auto valid_u8_prev_index(
const Range& str,
size_t i) ->
size_t
371 valid_u8_reverse_index(str, i);
375template <
class Range>
376[[nodiscard]]
auto valid_u8_write_cp(Range& buf,
size_t i, int32_t cp) ->
size_t
378 valid_u8_write_cp_and_advance(buf, i, cp);
Definition: unicode.hxx:209
Library main namespace.
Definition: aff_data.cxx:33
Definition: unicode.hxx:333
Definition: unicode.hxx:277
Definition: unicode.hxx:338
Definition: unicode.hxx:282
Definition: unicode.hxx:204
Definition: unicode.hxx:287