Nuspell
spellchecker
string_utils.hxx
Go to the documentation of this file.
1 /* Copyright 2018 Dimitrij Mijoski, Sander van Geloven
2  * Copyright 2016-2017 Dimitrij Mijoski
3  *
4  * This file is part of Nuspell.
5  *
6  * Nuspell is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Nuspell is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
25 #ifndef NUSPELL_STRING_UTILS_HXX
26 #define NUSPELL_STRING_UTILS_HXX
27 
28 #include <algorithm>
29 #include <iterator>
30 #include <locale>
31 #include <stack>
32 #include <string>
33 #include <vector>
34 
35 #ifdef __has_include
36 #if __has_include(<experimental/string_view>)
37 #if !defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000
38 #include <experimental/string_view>
39 #if defined(__cpp_lib_experimental_string_view) || defined(_LIBCPP_VERSION)
40 #define NUSPELL_STR_VIEW_NS std::experimental
41 #endif
42 #endif
43 #endif
44 #endif
45 
46 #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 7000
47 #include <string_view>
48 #define NUSPELL_STR_VIEW_NS std
49 #endif
50 
51 #ifndef NUSPELL_STR_VIEW_NS
52 #define NUSPELL_STR_VIEW_NS boost
53 #include <boost/functional/hash.hpp>
54 #include <boost/utility/string_view.hpp>
55 template <class CharT>
56 struct std::hash<boost::basic_string_view<CharT>> {
57  auto operator()(boost::basic_string_view<CharT> s) const
58  {
59  return boost::hash_range(begin(s), end(s));
60  }
61 };
62 #endif
63 
64 namespace nuspell {
65 #define NUSPELL_LITERAL(T, x) ::nuspell::literal_choose<T>(x, L##x)
66 
67 template <class CharT>
68 auto constexpr literal_choose(const char* narrow, const wchar_t* wide);
69 template <>
70 auto constexpr literal_choose<char>(const char* narrow, const wchar_t*)
71 {
72  return narrow;
73 }
74 template <>
75 auto constexpr literal_choose<wchar_t>(const char*, const wchar_t* wide)
76 {
77  return wide;
78 }
79 
80 template <class CharT>
81 using my_string_view = NUSPELL_STR_VIEW_NS::basic_string_view<CharT>;
82 using string_view = my_string_view<char>;
83 using wstring_view = my_string_view<wchar_t>;
84 
96 template <class CharT, class SepT, class OutIt>
97 auto split_on_any_of(const std::basic_string<CharT>& s, const SepT& sep,
98  OutIt out)
99 {
100  using size_type = typename std::basic_string<CharT>::size_type;
101  size_type i1 = 0;
102  size_type i2;
103  do {
104  i2 = s.find_first_of(sep, i1);
105  *out++ = s.substr(i1, i2 - i1);
106  i1 = i2 + 1; // we can only add +1 if sep is single char.
107 
108  // i2 gets s.npos after the last separator.
109  // Length of i2-i1 will always go past the end. That is defined.
110  } while (i2 != s.npos);
111  return out;
112 }
113 
125 template <class CharT, class OutIt>
126 auto split(const std::basic_string<CharT>& s, CharT sep, OutIt out)
127 {
128  return split_on_any_of(s, sep, out);
129 }
130 
143 template <class CharT, class OutIt>
144 auto split_on_whitespace(const std::basic_string<CharT>& s, OutIt out,
145  const std::locale& loc = std::locale()) -> OutIt
146 {
147  auto& f = std::use_facet<std::ctype<CharT>>(loc);
148  auto isspace = [&](auto& c) { return f.is(std::ctype_base::space, c); };
149  auto i1 = begin(s);
150  auto endd = end(s);
151  do {
152  auto i2 = std::find_if_not(i1, endd, isspace);
153  if (i2 == endd)
154  break;
155  auto i3 = std::find_if(i2, endd, isspace);
156  *out++ = std::basic_string<CharT>(i2, i3);
157  i1 = i3;
158  } while (i1 != endd);
159  return out;
160 }
161 
171 template <class CharT>
172 auto split_on_whitespace_v(const std::basic_string<CharT>& s,
173  std::vector<std::basic_string<CharT>>& v,
174  const std::locale& loc = std::locale()) -> void
175 {
176  v.clear();
177  split_on_whitespace(s, back_inserter(v), loc);
178 }
179 
180 template <class CharT>
181 auto& erase_chars(std::basic_string<CharT>& s,
182  const std::basic_string<CharT>& erase_chars)
183 {
184  if (erase_chars.empty())
185  return s;
186  auto is_erasable = [&](CharT c) {
187  return erase_chars.find(c) != erase_chars.npos;
188  };
189  auto it = remove_if(begin(s), end(s), is_erasable);
190  s.erase(it, end(s));
191  return s;
192 }
193 
194 template <class CharT>
195 auto& replace_char(std::basic_string<CharT>& s, CharT from, CharT to)
196 {
197  for (auto i = s.find(from); i != s.npos; i = s.find(from, i + 1)) {
198  s[i] = to;
199  }
200  return s;
201 }
202 
210 template <class CharT>
211 auto is_number(const std::basic_string<CharT>& s) -> bool
212 {
213  if (s.empty())
214  return false;
215 
216  auto it = begin(s);
217  if (s[0] == '-')
218  ++it;
219  while (it != end(s)) {
220  auto next = find_if(it, end(s),
221  [](auto c) { return c < '0' || c > '9'; });
222  if (next == it)
223  return false;
224  if (next == end(s))
225  return true;
226  it = next;
227  auto c = *it;
228  if (c == '.' || c == ',' || c == '-')
229  ++it;
230  else
231  return false;
232  }
233  return false;
234 }
235 
236 template <class DataIter, class PatternIter, class FuncEq = std::equal_to<>>
237 auto match_simple_regex(DataIter data_first, DataIter data_last,
238  PatternIter pat_first, PatternIter pat_last,
239  FuncEq eq = FuncEq())
240 {
241  auto s = std::stack<std::pair<DataIter, PatternIter>>();
242  s.emplace(data_first, pat_first);
243  auto data_it = DataIter();
244  auto pat_it = PatternIter();
245  while (!s.empty()) {
246  std::tie(data_it, pat_it) = s.top();
247  s.pop();
248  if (pat_it == pat_last) {
249  if (data_it == data_last)
250  return true;
251  else
252  return false;
253  }
254  auto node_type = *pat_it;
255  if (pat_it + 1 == pat_last)
256  node_type = 0;
257  else
258  node_type = *(pat_it + 1);
259  switch (node_type) {
260  case '?':
261  s.emplace(data_it, pat_it + 2);
262  if (data_it != data_last && eq(*data_it, *pat_it))
263  s.emplace(data_it + 1, pat_it + 2);
264  break;
265  case '*':
266  s.emplace(data_it, pat_it + 2);
267  if (data_it != data_last && eq(*data_it, *pat_it))
268  s.emplace(data_it + 1, pat_it);
269 
270  break;
271  default:
272  if (data_it != data_last && eq(*data_it, *pat_it))
273  s.emplace(data_it + 1, pat_it + 1);
274  break;
275  }
276  }
277  return false;
278 }
279 
280 template <class DataRange, class PatternRange, class FuncEq = std::equal_to<>>
281 auto match_simple_regex(const DataRange& data, const PatternRange& pattern,
282  FuncEq eq = FuncEq())
283 {
284  using namespace std;
285  return match_simple_regex(begin(data), end(data), begin(pattern),
286  end(pattern), eq);
287 }
288 
289 } // namespace nuspell
290 #endif // NUSPELL_STRING_UTILS_HXX
Definition: main.cxx:516
Library main namespace.
Definition: aff_data.cxx:78
auto split_on_whitespace_v(const std::basic_string< CharT > &s, std::vector< std::basic_string< CharT >> &v, const std::locale &loc=std::locale()) -> void
Splits on whitespace, outputs to vector of strings.
Definition: string_utils.hxx:172
auto split(const std::basic_string< CharT > &s, CharT sep, OutIt out)
Splits string on single char seperator.
Definition: string_utils.hxx:126
auto split_on_whitespace(const std::basic_string< CharT > &s, OutIt out, const std::locale &loc=std::locale()) -> OutIt
Splits on whitespace.
Definition: string_utils.hxx:144
auto split_on_any_of(const std::basic_string< CharT > &s, const SepT &sep, OutIt out)
Splits string on set of single char seperators.
Definition: string_utils.hxx:97
auto is_number(const std::basic_string< CharT > &s) -> bool
Tests if word is a number.
Definition: string_utils.hxx:211