Nuspell
spell checker
utils.hxx
1 /* Copyright 2016-2023 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef NUSPELL_UTILS_HXX
20 #define NUSPELL_UTILS_HXX
21 
22 #include "defines.hxx"
23 #include "nuspell_export.h"
24 
25 #include <string>
26 #include <string_view>
27 #include <vector>
28 
29 #include <unicode/locid.h>
30 
31 #ifdef __GNUC__
32 #define likely(expr) __builtin_expect(!!(expr), 1)
33 #define unlikely(expr) __builtin_expect(!!(expr), 0)
34 #else
35 #define likely(expr) (expr)
36 #define unlikely(expr) (expr)
37 #endif
38 
39 struct UConverter; // unicode/ucnv.h
40 
41 namespace nuspell {
42 NUSPELL_BEGIN_INLINE_NAMESPACE
43 
44 NUSPELL_DEPRECATED_EXPORT auto split_on_any_of(std::string_view s,
45  const char* sep,
46  std::vector<std::string>& out)
47  -> std::vector<std::string>&;
48 
49 NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in, std::string& out)
50  -> void;
51 NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in) -> std::string;
52 
53 auto valid_utf8_to_32(std::string_view in, std::u32string& out) -> void;
54 auto valid_utf8_to_32(std::string_view in) -> std::u32string;
55 
56 auto utf8_to_16(std::string_view in) -> std::u16string;
57 auto utf8_to_16(std::string_view in, std::u16string& out) -> bool;
58 
59 auto validate_utf8(std::string_view s) -> bool;
60 
61 NUSPELL_EXPORT auto is_all_ascii(std::string_view s) -> bool;
62 
63 NUSPELL_EXPORT auto latin1_to_ucs2(std::string_view s) -> std::u16string;
64 auto latin1_to_ucs2(std::string_view s, std::u16string& out) -> void;
65 
66 NUSPELL_EXPORT auto is_all_bmp(std::u16string_view s) -> bool;
67 
68 auto to_upper_ascii(std::string& s) -> void;
69 
70 [[nodiscard]] NUSPELL_EXPORT auto to_upper(std::string_view in,
71  const icu::Locale& loc)
72  -> std::string;
73 [[nodiscard]] NUSPELL_EXPORT auto to_title(std::string_view in,
74  const icu::Locale& loc)
75  -> std::string;
76 [[nodiscard]] NUSPELL_EXPORT auto to_lower(std::string_view in,
77  const icu::Locale& loc)
78  -> std::string;
79 
80 auto to_upper(std::string_view in, const icu::Locale& loc, std::string& out)
81  -> void;
82 auto to_title(std::string_view in, const icu::Locale& loc, std::string& out)
83  -> void;
84 auto to_lower(std::u32string_view in, const icu::Locale& loc,
85  std::u32string& out) -> void;
86 auto to_lower(std::string_view in, const icu::Locale& loc, std::string& out)
87  -> void;
88 auto to_lower_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
89 auto to_title_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
90 
98 enum class Casing : char {
99  SMALL,
100  INIT_CAPITAL,
101  ALL_CAPITAL,
102  CAMEL ,
103  PASCAL
104 };
105 
106 NUSPELL_EXPORT auto classify_casing(std::string_view s) -> Casing;
107 
108 auto has_uppercase_at_compound_word_boundary(std::string_view word, size_t i)
109  -> bool;
110 
112  UConverter* cnv = nullptr;
113 
114  public:
115  Encoding_Converter() = default;
116  explicit Encoding_Converter(const char* enc);
117  explicit Encoding_Converter(const std::string& enc)
118  : Encoding_Converter(enc.c_str())
119  {
120  }
122  Encoding_Converter(const Encoding_Converter& other) = delete;
123  Encoding_Converter(Encoding_Converter&& other) noexcept
124  {
125  cnv = other.cnv;
126  cnv = nullptr;
127  }
128  auto operator=(const Encoding_Converter& other)
129  -> Encoding_Converter& = delete;
130  auto operator=(Encoding_Converter&& other) noexcept
132  {
133  std::swap(cnv, other.cnv);
134  return *this;
135  }
136  auto to_utf8(std::string_view in, std::string& out) -> bool;
137  auto valid() -> bool { return cnv != nullptr; }
138 };
139 
140 auto replace_ascii_char(std::string& s, char from, char to) -> void;
141 auto erase_chars(std::string& s, std::string_view erase_chars) -> void;
142 NUSPELL_EXPORT auto is_number(std::string_view s) -> bool;
143 auto count_appereances_of(std::string_view haystack, std::string_view needles)
144  -> size_t;
145 
146 auto inline begins_with(std::string_view haystack, std::string_view needle)
147  -> bool
148 {
149  return haystack.compare(0, needle.size(), needle) == 0;
150 }
151 
152 auto inline ends_with(std::string_view haystack, std::string_view needle)
153  -> bool
154 {
155  return haystack.size() >= needle.size() &&
156  haystack.compare(haystack.size() - needle.size(), needle.size(),
157  needle) == 0;
158 }
159 
160 template <class T>
161 auto begin_ptr(T& x)
162 {
163  return x.data();
164 }
165 template <class T>
166 auto end_ptr(T& x)
167 {
168  return x.data() + x.size();
169 }
170 NUSPELL_END_INLINE_NAMESPACE
171 } // namespace nuspell
172 #endif // NUSPELL_UTILS_HXX
Definition: utils.hxx:111
Library main namespace.
Definition: aff_data.cxx:33