nuspell 5.1.6
Nuspell spellchecking library
Loading...
Searching...
No Matches
utils.hxx
1/* Copyright 2016-2024 Dimitrij Mijoski
2 *
3 * This file is part of Nuspell.
4 *
5 * Nuspell is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * Nuspell is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef NUSPELL_UTILS_HXX
20#define NUSPELL_UTILS_HXX
21
22#include "defines.hxx"
23#include "nuspell_export.h"
24
25#include <string>
26#include <string_view>
27#include <vector>
28
29#include <unicode/locid.h>
30
31#ifdef __GNUC__
32#define likely(expr) __builtin_expect(!!(expr), 1)
33#define unlikely(expr) __builtin_expect(!!(expr), 0)
34#else
35#define likely(expr) (expr)
36#define unlikely(expr) (expr)
37#endif
38
39struct UConverter; // unicode/ucnv.h
40
41namespace nuspell {
42NUSPELL_BEGIN_INLINE_NAMESPACE
43
44NUSPELL_DEPRECATED_EXPORT auto split_on_any_of(std::string_view s,
45 const char* sep,
46 std::vector<std::string>& out)
47 -> std::vector<std::string>&;
48
49NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in, std::string& out)
50 -> void;
51NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in) -> std::string;
52
53auto valid_utf8_to_32(std::string_view in, std::u32string& out) -> void;
54auto valid_utf8_to_32(std::string_view in) -> std::u32string;
55
56auto utf8_to_16(std::string_view in) -> std::u16string;
57auto utf8_to_16(std::string_view in, std::u16string& out) -> bool;
58
59auto validate_utf8(std::string_view s) -> bool;
60
61NUSPELL_EXPORT auto is_all_ascii(std::string_view s) -> bool;
62
63NUSPELL_EXPORT auto latin1_to_ucs2(std::string_view s) -> std::u16string;
64auto latin1_to_ucs2(std::string_view s, std::u16string& out) -> void;
65
66NUSPELL_EXPORT auto is_all_bmp(std::u16string_view s) -> bool;
67
68auto to_upper_ascii(std::string& s) -> void;
69
70[[nodiscard]] NUSPELL_EXPORT auto to_upper(std::string_view in,
71 const icu::Locale& loc)
72 -> std::string;
73[[nodiscard]] NUSPELL_EXPORT auto to_title(std::string_view in,
74 const icu::Locale& loc)
75 -> std::string;
76[[nodiscard]] NUSPELL_EXPORT auto to_lower(std::string_view in,
77 const icu::Locale& loc)
78 -> std::string;
79
80auto to_upper(std::string_view in, const icu::Locale& loc, std::string& out)
81 -> void;
82auto to_title(std::string_view in, const icu::Locale& loc, std::string& out)
83 -> void;
84auto to_lower(std::u32string_view in, const icu::Locale& loc,
85 std::u32string& out) -> void;
86auto to_lower(std::string_view in, const icu::Locale& loc, std::string& out)
87 -> void;
88auto to_lower_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
89auto to_title_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
90
98enum class Casing : char {
99 SMALL,
100 INIT_CAPITAL,
101 ALL_CAPITAL,
102 CAMEL ,
103 PASCAL
104};
105
106NUSPELL_EXPORT auto classify_casing(std::string_view s) -> Casing;
107
108auto has_uppercase_at_compound_word_boundary(std::string_view word, size_t i)
109 -> bool;
110
111class Encoding_Converter {
112 UConverter* cnv = nullptr;
113
114 public:
115 Encoding_Converter() = default;
116 explicit Encoding_Converter(const char* enc);
117 explicit Encoding_Converter(const std::string& enc)
118 : Encoding_Converter(enc.c_str())
119 {
120 }
121 ~Encoding_Converter();
122 Encoding_Converter(const Encoding_Converter& other) = delete;
123 Encoding_Converter(Encoding_Converter&& other) noexcept
124 {
125 cnv = other.cnv;
126 cnv = nullptr;
127 }
128 auto operator=(const Encoding_Converter& other)
129 -> Encoding_Converter& = delete;
130 auto operator=(Encoding_Converter&& other) noexcept
131 -> Encoding_Converter&
132 {
133 std::swap(cnv, other.cnv);
134 return *this;
135 }
136 auto to_utf8(std::string_view in, std::string& out) -> bool;
137 auto valid() -> bool { return cnv != nullptr; }
138};
139
140auto replace_ascii_char(std::string& s, char from, char to) -> void;
141auto erase_chars(std::string& s, std::string_view erase_chars) -> void;
142NUSPELL_EXPORT auto is_number(std::string_view s) -> bool;
143auto count_appereances_of(std::string_view haystack, std::string_view needles)
144 -> size_t;
145
146auto inline begins_with(std::string_view haystack, std::string_view needle)
147 -> bool
148{
149 return haystack.compare(0, needle.size(), needle) == 0;
150}
151
152auto inline ends_with(std::string_view haystack, std::string_view needle)
153 -> bool
154{
155 return haystack.size() >= needle.size() &&
156 haystack.compare(haystack.size() - needle.size(), needle.size(),
157 needle) == 0;
158}
159
160template <class T>
161auto begin_ptr(T& x)
162{
163 return x.data();
164}
165template <class T>
166auto end_ptr(T& x)
167{
168 return x.data() + x.size();
169}
170NUSPELL_END_INLINE_NAMESPACE
171} // namespace nuspell
172#endif // NUSPELL_UTILS_HXX
Library main namespace.
Definition aff_data.cxx:33