Nuspell
spell checker
utils.hxx
1 /* Copyright 2016-2021 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef NUSPELL_UTILS_HXX
20 #define NUSPELL_UTILS_HXX
21 
22 #include "nuspell_export.h"
23 
24 #include <string>
25 #include <string_view>
26 #include <vector>
27 
28 #include <unicode/locid.h>
29 
30 #ifdef __GNUC__
31 #define likely(expr) __builtin_expect(!!(expr), 1)
32 #define unlikely(expr) __builtin_expect(!!(expr), 0)
33 #else
34 #define likely(expr) (expr)
35 #define unlikely(expr) (expr)
36 #endif
37 
38 struct UConverter; // unicode/ucnv.h
39 
40 namespace nuspell {
41 inline namespace v5 {
42 
43 auto split(std::string_view s, char sep, std::vector<std::string>& out)
44  -> std::vector<std::string>&;
45 NUSPELL_EXPORT auto split_on_any_of(std::string_view s, const char* sep,
46  std::vector<std::string>& out)
47  -> std::vector<std::string>&;
48 
49 NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in, std::string& out)
50  -> void;
51 NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in) -> std::string;
52 
53 auto valid_utf8_to_32(std::string_view in, std::u32string& out) -> void;
54 auto valid_utf8_to_32(std::string_view in) -> std::u32string;
55 
56 auto utf8_to_16(std::string_view in) -> std::u16string;
57 auto utf8_to_16(std::string_view in, std::u16string& out) -> bool;
58 
59 auto validate_utf8(std::string_view s) -> bool;
60 
61 NUSPELL_EXPORT auto is_all_ascii(std::string_view s) -> bool;
62 
63 NUSPELL_EXPORT auto latin1_to_ucs2(std::string_view s) -> std::u16string;
64 auto latin1_to_ucs2(std::string_view s, std::u16string& out) -> void;
65 
66 NUSPELL_EXPORT auto is_all_bmp(std::u16string_view s) -> bool;
67 
68 auto to_upper_ascii(std::string& s) -> void;
69 
70 [[nodiscard]] NUSPELL_EXPORT auto to_upper(std::string_view in,
71  const icu::Locale& loc)
72  -> std::string;
73 [[nodiscard]] NUSPELL_EXPORT auto to_title(std::string_view in,
74  const icu::Locale& loc)
75  -> std::string;
76 [[nodiscard]] NUSPELL_EXPORT auto to_lower(std::string_view in,
77  const icu::Locale& loc)
78  -> std::string;
79 
80 auto to_upper(std::string_view in, const icu::Locale& loc, std::string& out)
81  -> void;
82 auto to_title(std::string_view in, const icu::Locale& loc, std::string& out)
83  -> void;
84 auto to_lower(std::u32string_view in, const icu::Locale& loc,
85  std::u32string& out) -> void;
86 auto to_lower(std::string_view in, const icu::Locale& loc, std::string& out)
87  -> void;
88 auto to_lower_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
89 auto to_title_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
90 
98 enum class Casing : char {
99  SMALL,
100  INIT_CAPITAL,
101  ALL_CAPITAL,
102  CAMEL ,
103  PASCAL
104 };
105 
106 NUSPELL_EXPORT auto classify_casing(std::string_view s) -> Casing;
107 
108 auto has_uppercase_at_compound_word_boundary(std::string_view word, size_t i)
109  -> bool;
110 
112  UConverter* cnv = nullptr;
113 
114  public:
115  Encoding_Converter() = default;
116  explicit Encoding_Converter(const char* enc);
117  explicit Encoding_Converter(const std::string& enc)
118  : Encoding_Converter(enc.c_str())
119  {
120  }
123  Encoding_Converter(Encoding_Converter&& other) noexcept
124  {
125  cnv = other.cnv;
126  cnv = nullptr;
127  }
128  auto operator=(const Encoding_Converter& other) -> Encoding_Converter&;
129  auto operator=(Encoding_Converter&& other) noexcept
131  {
132  std::swap(cnv, other.cnv);
133  return *this;
134  }
135  auto to_utf8(std::string_view in, std::string& out) -> bool;
136  auto valid() -> bool { return cnv != nullptr; }
137 };
138 
139 auto replace_ascii_char(std::string& s, char from, char to) -> void;
140 auto erase_chars(std::string& s, std::string_view erase_chars) -> void;
141 NUSPELL_EXPORT auto is_number(std::string_view s) -> bool;
142 auto count_appereances_of(std::string_view haystack, std::string_view needles)
143  -> size_t;
144 
145 auto inline begins_with(std::string_view haystack, std::string_view needle)
146  -> bool
147 {
148  return haystack.compare(0, needle.size(), needle) == 0;
149 }
150 
151 auto inline ends_with(std::string_view haystack, std::string_view needle)
152  -> bool
153 {
154  return haystack.size() >= needle.size() &&
155  haystack.compare(haystack.size() - needle.size(), needle.size(),
156  needle) == 0;
157 }
158 
159 template <class T>
160 auto begin_ptr(T& x)
161 {
162  return x.data();
163 }
164 template <class T>
165 auto end_ptr(T& x)
166 {
167  return x.data() + x.size();
168 }
169 } // namespace v5
170 } // namespace nuspell
171 #endif // NUSPELL_UTILS_HXX
Definition: utils.hxx:111
Library main namespace.
Definition: aff_data.cxx:33