18 #ifndef NUSPELL_UNICODE_HXX
19 #define NUSPELL_UNICODE_HXX
20 #include "defines.hxx"
23 #include <string_view>
24 #include <unicode/utf16.h>
25 #include <unicode/utf8.h>
28 NUSPELL_BEGIN_INLINE_NAMESPACE
32 inline constexpr
auto u8_max_cp_length = U8_MAX_LENGTH;
34 auto inline u8_is_cp_error(int32_t cp) ->
bool {
return cp < 0; }
36 template <
class Range>
37 auto u8_advance_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
39 using std::size, std::data;
40 #if U_ICU_VERSION_MAJOR_NUM <= 60
41 auto s_ptr = data(str);
43 int32_t len = size(str);
44 U8_NEXT(s_ptr, idx, len, cp);
48 U8_NEXT(str, i, len, cp);
52 template <
class Range>
53 auto u8_advance_index(
const Range& str,
size_t& i) ->
void
57 U8_FWD_1(str, i, len);
60 template <
class Range>
61 auto u8_reverse_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
63 using std::size, std::data;
66 U8_PREV(ptr, 0, idx, cp);
70 template <
class Range>
71 auto u8_reverse_index(
const Range& str,
size_t& i) ->
void
73 using std::size, std::data;
76 U8_BACK_1(ptr, 0, idx);
80 template <
class Range>
81 auto u8_write_cp_and_advance(Range& buf,
size_t& i, int32_t cp,
bool& error)
84 using std::size, std::data;
85 #if U_ICU_VERSION_MAJOR_NUM <= 60
88 int32_t len = size(buf);
89 U8_APPEND(buf, idx, len, cp, error);
93 U8_APPEND(buf, i, len, cp, error);
99 template <
class Range>
100 auto valid_u8_advance_cp(
const Range& str,
size_t& i, char32_t& cp) ->
void
102 U8_NEXT_UNSAFE(str, i, cp);
105 template <
class Range>
106 auto valid_u8_advance_index(
const Range& str,
size_t& i) ->
void
108 U8_FWD_1_UNSAFE(str, i);
111 template <
class Range>
112 auto valid_u8_reverse_cp(
const Range& str,
size_t& i, char32_t& cp) ->
void
114 U8_PREV_UNSAFE(str, i, cp);
117 template <
class Range>
118 auto valid_u8_reverse_index(
const Range& str,
size_t& i) ->
void
120 U8_BACK_1_UNSAFE(str, i);
123 template <
class Range>
124 auto valid_u8_write_cp_and_advance(Range& buf,
size_t& i, char32_t cp) ->
void
126 U8_APPEND_UNSAFE(buf, i, cp);
131 inline constexpr
auto u16_max_cp_length = U16_MAX_LENGTH;
133 auto inline u16_is_cp_error(int32_t cp) ->
bool {
return U_IS_SURROGATE(cp); }
135 template <
class Range>
136 auto u16_advance_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
139 auto len = size(str);
140 U16_NEXT(str, i, len, cp);
143 template <
class Range>
144 auto u16_advance_index(
const Range& str,
size_t& i) ->
void
147 auto len = size(str);
148 U16_FWD_1(str, i, len);
151 template <
class Range>
152 auto u16_reverse_cp(
const Range& str,
size_t& i, int32_t& cp) ->
void
154 U16_PREV(str, 0, i, cp);
157 template <
class Range>
158 auto u16_reverse_index(
const Range& str,
size_t& i) ->
void
160 U16_BACK_1(str, 0, i);
163 template <
class Range>
164 auto u16_write_cp_and_advance(Range& buf,
size_t& i, int32_t cp,
bool& error)
168 auto len = size(buf);
169 U16_APPEND(buf, i, len, cp, error);
174 template <
class Range>
175 auto valid_u16_advance_cp(
const Range& str,
size_t& i, char32_t& cp) ->
void
177 U16_NEXT_UNSAFE(str, i, cp);
180 template <
class Range>
181 auto valid_u16_advance_index(
const Range& str,
size_t& i) ->
void
183 U16_FWD_1_UNSAFE(str, i);
186 template <
class Range>
187 auto valid_u16_reverse_cp(
const Range& str,
size_t& i, char32_t& cp) ->
void
189 U16_PREV_UNSAFE(str, i, cp);
192 template <
class Range>
193 auto valid_u16_reverse_index(
const Range& str,
size_t& i) ->
void
195 U16_BACK_1_UNSAFE(str, i);
198 template <
class Range>
199 auto valid_u16_write_cp_and_advance(Range& buf,
size_t& i, char32_t cp) ->
void
201 U16_APPEND_UNSAFE(buf, i, cp);
208 size_t end_i = begin_i;
212 char d[u8_max_cp_length];
217 : sz(pos.end_i - pos.begin_i)
224 }
while (i && --max_len);
229 valid_u8_write_cp_and_advance(d, z, cp);
232 auto size()
const noexcept ->
size_t {
return sz; }
233 auto data()
const noexcept ->
const char* {
return d; }
234 operator std::string_view()
const noexcept
236 return std::string_view(data(), size());
238 auto copy_to(std::string& str,
size_t j)
const
245 }
while (i && --max_len);
249 auto inline u8_swap_adjacent_cp(std::string& str,
size_t i1,
size_t i2,
253 auto cp2 = U8_Encoded_CP(str, {i2, i3});
254 auto new_i2 = i1 + std::size(cp2);
255 cp1.copy_to(str, new_i2);
256 cp2.copy_to(str, i1);
260 auto inline u8_swap_cp(std::string& str, U8_CP_Pos pos1, U8_CP_Pos pos2)
261 -> std::pair<size_t, size_t>
264 auto cp1 = U8_Encoded_CP(str, pos1);
265 auto cp2 = U8_Encoded_CP(str, pos2);
266 auto new_p1_end_i = pos1.begin_i + size(cp2);
267 auto new_p2_begin_i = pos2.end_i - size(cp1);
268 std::char_traits<char>::move(&str[new_p1_end_i], &str[pos1.end_i],
269 pos2.begin_i - pos1.end_i);
270 cp2.copy_to(str, pos1.begin_i);
271 cp1.copy_to(str, new_p2_begin_i);
272 return {new_p1_end_i, new_p2_begin_i};
294 template <
class Range>
295 [[nodiscard]]
auto u8_next_cp(
const Range& str,
size_t i) ->
Idx_And_Next_CP
298 u8_advance_cp(str, i, cp);
302 template <
class Range>
303 [[nodiscard]]
auto u8_next_index(
const Range& str,
size_t i) ->
size_t
305 u8_advance_index(str, i);
309 template <
class Range>
310 [[nodiscard]]
auto u8_prev_cp(
const Range& str,
size_t i) -> Idx_And_Prev_CP
313 u8_reverse_cp(str, i, cp);
317 template <
class Range>
318 [[nodiscard]]
auto u8_prev_index(
const Range& str,
size_t i) ->
size_t
320 u8_reverse_index(str, i);
324 template <
class Range>
325 [[nodiscard]]
auto u8_write_cp(Range& buf,
size_t i, int32_t cp)
326 -> Write_CP_Idx_and_Error
329 u8_write_cp_and_advance(buf, i, cp, err);
345 template <
class Range>
346 [[nodiscard]]
auto valid_u8_next_cp(
const Range& str,
size_t i)
350 valid_u8_advance_cp(str, i, cp);
354 template <
class Range>
355 [[nodiscard]]
auto valid_u8_next_index(
const Range& str,
size_t i) ->
size_t
357 valid_u8_advance_index(str, i);
361 template <
class Range>
362 [[nodiscard]]
auto valid_u8_prev_cp(
const Range& str,
size_t i)
363 -> Idx_And_Prev_CP_Valid
366 valid_u8_reverse_cp(str, i, cp);
370 template <
class Range>
371 [[nodiscard]]
auto valid_u8_prev_index(
const Range& str,
size_t i) ->
size_t
373 valid_u8_reverse_index(str, i);
377 template <
class Range>
378 [[nodiscard]]
auto valid_u8_write_cp(Range& buf,
size_t i, int32_t cp) ->
size_t
380 valid_u8_write_cp_and_advance(buf, i, cp);
383 NUSPELL_END_INLINE_NAMESPACE
Definition: unicode.hxx:211
Library main namespace.
Definition: aff_data.cxx:33
Definition: unicode.hxx:335
Definition: unicode.hxx:279
Definition: unicode.hxx:340
Definition: unicode.hxx:284
Definition: unicode.hxx:206
Definition: unicode.hxx:289