Nuspell
spellchecker
structures.hxx
Go to the documentation of this file.
1 /* Copyright 2018 Dimitrij Mijoski, Sander van Geloven
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
24 #ifndef NUSPELL_STRUCTURES_HXX
25 #define NUSPELL_STRUCTURES_HXX
26 
27 #include <algorithm>
28 #include <cmath>
29 #include <iterator>
30 #include <stdexcept>
31 #include <string>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35 
36 #include <boost/container/small_vector.hpp>
37 #include <boost/multi_index/member.hpp>
38 #include <boost/range/iterator_range_core.hpp>
39 
40 #ifdef __has_include
41 #if __has_include(<experimental/string_view>)
42 #if !defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000
43 #include <experimental/string_view>
44 #if defined(__cpp_lib_experimental_string_view) || defined(_LIBCPP_VERSION)
45 #define NUSPELL_STR_VIEW_NS std::experimental
46 #endif
47 #endif
48 #endif
49 #endif
50 
51 #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 7000
52 #include <string_view>
53 #define NUSPELL_STR_VIEW_NS std
54 #endif
55 
56 #ifndef NUSPELL_STR_VIEW_NS
57 #include <boost/utility/string_view.hpp>
58 #endif
59 
60 namespace nuspell {
61 
67 template <class CharT>
68 class String_Set {
69  private:
70  std::basic_string<CharT> d;
71  auto sort_uniq() -> void
72  {
73  auto first = begin();
74  auto last = end();
75  using t = traits_type;
76  sort(first, last, t::lt);
77  d.erase(unique(first, last, t::eq), last);
78  }
79  struct Char_Traits_Less_Than {
80  auto operator()(CharT a, CharT b)
81  {
82  return traits_type::lt(a, b);
83  }
84  };
85 
86  public:
87  using StrT = std::basic_string<CharT>;
88  using traits_type = typename StrT::traits_type;
89 
90  using key_type = typename StrT::value_type;
91  using key_compare = Char_Traits_Less_Than;
92  using value_type = typename StrT::value_type;
93  using value_compare = key_compare;
94  using allocator_type = typename StrT::allocator_type;
95  using pointer = typename StrT::pointer;
96  using const_pointer = typename StrT::const_pointer;
97  using reference = typename StrT::reference;
98  using const_reference = typename StrT::const_reference;
99  using size_type = typename StrT::size_type;
100  using difference_type = typename StrT::difference_type;
101  using iterator = typename StrT::iterator;
102  using const_iterator = typename StrT::const_iterator;
103  using reverse_iterator = typename StrT::reverse_iterator;
104  using const_reverse_iterator = typename StrT::const_reverse_iterator;
105 
106  String_Set() = default;
107  String_Set(const StrT& s) : d(s) { sort_uniq(); }
108  String_Set(StrT&& s) : d(move(s)) { sort_uniq(); }
109  template <class InputIterator>
110  String_Set(InputIterator first, InputIterator last) : d(first, last)
111  {
112  sort_uniq();
113  }
114  String_Set(std::initializer_list<value_type> il) : d(il)
115  {
116  sort_uniq();
117  }
118 
119  auto& operator=(const StrT& s)
120  {
121  d = s;
122  sort_uniq();
123  return *this;
124  }
125  auto& operator=(StrT&& s)
126  {
127  d = move(s);
128  sort_uniq();
129  return *this;
130  }
131  auto& operator=(std::initializer_list<value_type> il)
132  {
133  d = il;
134  sort_uniq();
135  return *this;
136  }
137 
138  // non standard underlying storage access:
139  auto& data() const { return d; }
140  operator const StrT&() const noexcept { return d; }
141 
142  // iterators:
143  iterator begin() noexcept { return d.begin(); }
144  const_iterator begin() const noexcept { return d.begin(); }
145  iterator end() noexcept { return d.end(); }
146  const_iterator end() const noexcept { return d.end(); }
147 
148  reverse_iterator rbegin() noexcept { return d.rbegin(); }
149  const_reverse_iterator rbegin() const noexcept { return d.rbegin(); }
150  reverse_iterator rend() noexcept { return d.rend(); }
151  const_reverse_iterator rend() const noexcept { return d.rend(); }
152 
153  const_iterator cbegin() const noexcept { return d.cbegin(); }
154  const_iterator cend() const noexcept { return d.cend(); }
155  const_reverse_iterator crbegin() const noexcept { return d.crbegin(); }
156  const_reverse_iterator crend() const noexcept { return d.crend(); }
157 
158  // capacity:
159  bool empty() const noexcept { return d.empty(); }
160  size_type size() const noexcept { return d.size(); }
161  size_type max_size() const noexcept { return d.max_size(); }
162 
163  // modifiers:
164  std::pair<iterator, bool> insert(const value_type& x)
165  {
166  auto it = lower_bound(x);
167  if (it != end() && *it == x) {
168  return {it, false};
169  }
170  auto ret = d.insert(it, x);
171  return {ret, true};
172  }
173  // std::pair<iterator, bool> insert(value_type&& x);
174  iterator insert(iterator hint, const value_type& x)
175  {
176  if (hint == end() || traits_type::lt(x, *hint)) {
177  if (hint == begin() ||
178  traits_type::lt(*(hint - 1), x)) {
179  return d.insert(hint, x);
180  }
181  }
182  return insert(x).first;
183  }
184 
185  // iterator insert(const_iterator hint, value_type&& x);
186  template <class InputIterator>
187  void insert(InputIterator first, InputIterator last)
188  {
189  d.insert(end(), first, last);
190  sort_uniq();
191  }
192  void insert(std::initializer_list<value_type> il)
193  {
194  d.insert(end(), il);
195  sort_uniq();
196  }
197  template <class... Args>
198  std::pair<iterator, bool> emplace(Args&&... args)
199  {
200  return insert(CharT(args...));
201  }
202 
203  template <class... Args>
204  iterator emplace_hint(iterator hint, Args&&... args)
205  {
206  return insert(hint, CharT(args...));
207  }
208 
209  iterator erase(iterator position) { return d.erase(position); }
210  size_type erase(const key_type& x)
211  {
212  auto i = d.find(x);
213  if (i != d.npos) {
214  d.erase(i, 1);
215  return true;
216  }
217  return false;
218  }
219  iterator erase(iterator first, iterator last)
220  {
221  return d.erase(first, last);
222  }
223  void swap(String_Set& s) { d.swap(s.d); }
224  void clear() noexcept { d.clear(); }
225 
226  // non standrd modifiers:
227  auto insert(const StrT& s) -> void
228  {
229  d += s;
230  sort_uniq();
231  }
232  auto operator+=(const StrT& s) -> String_Set
233  {
234  insert(s);
235  return *this;
236  }
237 
238  // observers:
239  key_compare key_comp() const { return Char_Traits_Less_Than(); }
240  value_compare value_comp() const { return key_comp(); }
241 
242  // set operations:
243  private:
244  auto lookup(const key_type& x) const
245  {
246  auto i = d.find(x);
247  if (i != d.npos)
248  i = d.size();
249  return i;
250  }
251 
252  public:
253  iterator find(const key_type& x) { return begin() + lookup(x); }
254  const_iterator find(const key_type& x) const
255  {
256  return begin() + lookup(x);
257  }
258  size_type count(const key_type& x) const { return d.find(x) != d.npos; }
259 
260  iterator lower_bound(const key_type& x)
261  {
262  return std::lower_bound(begin(), end(), x, traits_type::lt);
263  }
264 
265  const_iterator lower_bound(const key_type& x) const
266  {
267  return std::lower_bound(begin(), end(), x, traits_type::lt);
268  }
269  iterator upper_bound(const key_type& x)
270  {
271  return std::upper_bound(begin(), end(), x, traits_type::lt);
272  }
273 
274  const_iterator upper_bound(const key_type& x) const
275  {
276  return std::upper_bound(begin(), end(), x, traits_type::lt);
277  }
278  std::pair<iterator, iterator> equal_range(const key_type& x)
279  {
280  return std::equal_range(begin(), end(), x, traits_type::lt);
281  }
282 
283  std::pair<const_iterator, const_iterator>
284  equal_range(const key_type& x) const
285  {
286  return std::equal_range(begin(), end(), x, traits_type::lt);
287  }
288 
289  // non standard set operations:
290  bool contains(const key_type& x) const { return count(x); }
291 
292  // compare
293  bool operator<(const String_Set& rhs) const { return d < rhs.d; }
294  bool operator<=(const String_Set& rhs) const { return d <= rhs.d; }
295  bool operator==(const String_Set& rhs) const { return d == rhs.d; }
296  bool operator!=(const String_Set& rhs) const { return d != rhs.d; }
297  bool operator>=(const String_Set& rhs) const { return d >= rhs.d; }
298  bool operator>(const String_Set& rhs) const { return d > rhs.d; }
299 };
300 // extern template class String_Set<char>;
301 // extern template class String_Set<wchar_t>;
302 extern template class String_Set<char16_t>;
303 
304 template <class CharT>
305 auto swap(String_Set<CharT>& a, String_Set<CharT>& b)
306 {
307  a.swap(b);
308 }
309 
311 
312 template <class CharT>
314  public:
315  using StrT = std::basic_string<CharT>;
316  using Table_Pairs = std::vector<std::pair<StrT, StrT>>;
317 
318  private:
319  Table_Pairs table;
320  void sort_uniq(); // implemented in cxx
321 
322  public:
323  Substr_Replacer() = default;
324  Substr_Replacer(const Table_Pairs& v) : table(v) { sort_uniq(); }
325  Substr_Replacer(const Table_Pairs&& v) : table(move(v)) { sort_uniq(); }
326 
327  auto& operator=(const Table_Pairs& v)
328  {
329  table = v;
330  sort_uniq();
331  return *this;
332  }
333  auto& operator=(const Table_Pairs&& v)
334  {
335  table = move(v);
336  sort_uniq();
337  return *this;
338  }
339 
340  template <class Range>
341  auto& operator=(const Range& range)
342  {
343  table.assign(std::begin(range), std::end(range));
344  sort_uniq();
345  return *this;
346  }
347 
348  auto replace(StrT& s) const -> StrT&; // implemented in cxx
349  auto replace_copy(StrT s) const -> StrT
350  {
351  replace(s);
352  return s;
353  }
354 };
355 extern template class Substr_Replacer<char>;
356 extern template class Substr_Replacer<wchar_t>;
357 
358 template <class CharT>
359 class Break_Table {
360  public:
361  using StrT = std::basic_string<CharT>;
362  using Table_Str = std::vector<StrT>;
363  using iterator = typename Table_Str::iterator;
364  using const_iterator = typename Table_Str::const_iterator;
365 
366  private:
367  Table_Str table;
368  size_t start_word_breaks_last_idx = 0;
369  size_t end_word_breaks_last_idx = 0;
370 
371  auto order_entries() -> void; // implemented in cxx
372 
373  public:
374  Break_Table() = default;
375  Break_Table(const Table_Str& v) : table(v) { order_entries(); }
376  Break_Table(Table_Str&& v) : table(move(v)) { order_entries(); }
377 
378  auto& operator=(const Table_Str& v)
379  {
380  table = v;
381  order_entries();
382  return *this;
383  }
384 
385  auto& operator=(Table_Str&& v)
386  {
387  table = move(v);
388  order_entries();
389  return *this;
390  }
391 
392  template <class Range>
393  auto& operator=(const Range& range)
394  {
395  table.assign(std::begin(range), std::end(range));
396  order_entries();
397  return *this;
398  }
399 
400  auto start_word_breaks() const -> boost::iterator_range<const_iterator>
401  {
402  return {begin(table),
403  begin(table) + start_word_breaks_last_idx};
404  }
405  auto end_word_breaks() const -> boost::iterator_range<const_iterator>
406  {
407  return {begin(table) + start_word_breaks_last_idx,
408  begin(table) + end_word_breaks_last_idx};
409  }
410  auto middle_word_breaks() const -> boost::iterator_range<const_iterator>
411  {
412  return {begin(table) + end_word_breaks_last_idx, end(table)};
413  }
414 };
415 extern template class Break_Table<char>;
416 extern template class Break_Table<wchar_t>;
417 
418 struct identity {
419  template <class T>
420  constexpr auto operator()(T&& t) const noexcept -> T&&
421  {
422  return std::forward<T>(t);
423  }
424 };
425 
426 template <class Value, class Key = Value, class KeyExtract = identity,
427  class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>>
429  private:
430  using bucket_type = boost::container::small_vector<Value, 1>;
431  static constexpr float max_load_fact = 7.0 / 8.0;
432  std::vector<bucket_type> data;
433  size_t sz = 0;
434  size_t max_load_factor_capacity = 0;
435  KeyExtract key_extract = {};
436  Hash hash = {};
437  KeyEqual equal = {};
438 
439  public:
440  using key_type = Key;
441  using value_type = Value;
442  using size_type = std::size_t;
443  using difference_type = std::ptrdiff_t;
444  using hasher = Hash;
445  using reference = value_type&;
446  using const_reference = const value_type&;
447  using pointer = typename bucket_type::pointer;
448  using const_pointer = typename bucket_type::const_pointer;
449  using local_iterator = typename bucket_type::iterator;
450  using local_const_iterator = typename bucket_type::const_iterator;
451 
452  Hash_Multiset() : data(16) {}
453 
454  auto size() const { return sz; }
455  auto empty() const { return size() == 0; }
456 
457  auto rehash(size_t count)
458  {
459  if (empty()) {
460  size_t capacity = 16;
461  while (capacity <= count)
462  capacity <<= 1;
463  data.resize(capacity);
464  max_load_factor_capacity =
465  std::ceil(capacity * max_load_fact);
466  return;
467  }
468  if (count < size() / max_load_fact)
469  count = size() / max_load_fact;
470  auto n = Hash_Multiset();
471  n.rehash(count);
472  for (auto& b : data) {
473  for (auto& x : b) {
474  n.insert(x);
475  }
476  }
477  data.swap(n.data);
478  sz = n.sz;
479  max_load_factor_capacity = n.max_load_factor_capacity;
480  }
481 
482  auto reserve(size_t count) -> void
483  {
484  rehash(std::ceil(count / max_load_fact));
485  }
486 
487  auto insert(const_reference value)
488  {
489  using namespace std;
490  if (sz == max_load_factor_capacity) {
491  reserve(sz + 1);
492  }
493  auto&& key = key_extract(value);
494  auto h = hash(key);
495  auto h_mod = h & (data.size() - 1);
496  auto& bucket = data[h_mod];
497  if (bucket.size() == 0 || bucket.size() == 1 ||
498  equal(key, key_extract(bucket.back()))) {
499  bucket.push_back(value);
500  ++sz;
501  return end(bucket) - 1;
502  }
503  auto last =
504  std::find_if(rbegin(bucket), rend(bucket), [&](auto& x) {
505  return equal(key, key_extract(x));
506  });
507  if (last != rend(bucket)) {
508  auto ret = bucket.insert(last.base(), value);
509  ++sz;
510  return ret;
511  }
512 
513  bucket.push_back(value);
514  ++sz;
515  return end(bucket) - 1;
516  }
517  template <class... Args>
518  auto emplace(Args&&... a)
519  {
520  return insert(value_type(std::forward<Args>(a)...));
521  }
522 
523  // Note, leaks non-const iterator. do not modify
524  // the key part of the returned value(s).
525  template <class CompatibleKey>
526  auto equal_range_nonconst_unsafe(const CompatibleKey& key)
527  -> std::pair<local_iterator, local_iterator>
528  {
529  using namespace std;
530  if (data.empty())
531  return {};
532  auto h = hash(key);
533  auto h_mod = h & (data.size() - 1);
534  auto& bucket = data[h_mod];
535  if (bucket.empty())
536  return {};
537  if (bucket.size() == 1) {
538  if (equal(key, key_extract(bucket.front())))
539  return {begin(bucket), end(bucket)};
540  return {};
541  }
542  auto first =
543  std::find_if(begin(bucket), end(bucket), [&](auto& x) {
544  return equal(key, key_extract(x));
545  });
546  if (first == end(bucket))
547  return {};
548  auto next = first + 1;
549  if (next == end(bucket) || !equal(key, key_extract(*next)))
550  return {first, next};
551  auto last =
552  std::find_if(rbegin(bucket), rend(bucket), [&](auto& x) {
553  return equal(key, key_extract(x));
554  });
555  return {first, last.base()};
556  }
557 
558  template <class CompatibleKey>
559  auto equal_range(const CompatibleKey& key) const
560  -> std::pair<local_const_iterator, local_const_iterator>
561  {
562  using namespace std;
563  if (data.empty())
564  return {};
565  auto h = hash(key);
566  auto h_mod = h & (data.size() - 1);
567  auto& bucket = data[h_mod];
568  if (bucket.empty())
569  return {};
570  if (bucket.size() == 1) {
571  if (equal(key, key_extract(bucket.front())))
572  return {begin(bucket), end(bucket)};
573  return {};
574  }
575  auto first =
576  std::find_if(begin(bucket), end(bucket), [&](auto& x) {
577  return equal(key, key_extract(x));
578  });
579  if (first == end(bucket))
580  return {};
581  auto next = first + 1;
582  if (next == end(bucket) || !equal(key, key_extract(*next)))
583  return {first, next};
584  auto last =
585  std::find_if(rbegin(bucket), rend(bucket), [&](auto& x) {
586  return equal(key, key_extract(x));
587  });
588  return {first, last.base()};
589  }
590 };
591 
598 template <class CharT>
599 class Condition {
600  public:
601  enum Span_Type {
602  NORMAL ,
603  DOT ,
604  ANY_OF ,
605  NONE_OF
606  };
607  using StrT = std::basic_string<CharT>;
608  template <class T, class U, class V>
609  using tuple = std::tuple<T, U, V>;
610  template <class T>
611  using vector = std::vector<T>;
612 
613  private:
614  StrT cond;
615  vector<tuple<size_t, size_t, Span_Type>> spans; // pos, len, type
616  size_t length = 0;
617 
618  auto construct() -> void; // implemented in cxx
619 
620  public:
621  Condition() = default;
622  Condition(const StrT& condition) : cond(condition) { construct(); }
623  Condition(StrT&& condition) : cond(move(condition)) { construct(); }
624  auto match(const StrT& s, size_t pos = 0, size_t len = StrT::npos) const
625  -> bool; // implemented in cxx
626  auto match_prefix(const StrT& s) const { return match(s, 0, length); }
627  auto match_suffix(const StrT& s) const
628  {
629  if (length > s.size())
630  return false;
631  return match(s, s.size() - length, length);
632  }
633 };
634 
635 template <class CharT>
636 class Prefix {
637  public:
638  using StrT = std::basic_string<CharT>;
639  using CondT = Condition<CharT>;
640 
641  char16_t flag = 0;
642  bool cross_product = false;
643  StrT stripping;
644  StrT appending;
645  Flag_Set cont_flags;
646  CondT condition;
647 
648  Prefix() = default;
649  Prefix(char16_t flag, bool cross_product, const StrT& strip,
650  const StrT& append, const Flag_Set& cont_flags,
651  const StrT& condition)
652  : flag(flag), cross_product(cross_product), stripping(strip),
653  appending(append), cont_flags(cont_flags), condition(condition)
654  {
655  }
656 
657  auto to_root(StrT& word) const -> StrT&
658  {
659  return word.replace(0, appending.size(), stripping);
660  }
661  auto to_root_copy(StrT word) const -> StrT
662  {
663  to_root(word);
664  return word;
665  }
666 
667  auto to_derived(StrT& word) const -> StrT&
668  {
669  return word.replace(0, stripping.size(), appending);
670  }
671  auto to_derived_copy(StrT word) const -> StrT
672  {
673  to_derived(word);
674  return word;
675  }
676 
677  auto check_condition(const StrT& word) const -> bool
678  {
679  return condition.match_prefix(word);
680  }
681 };
682 
683 template <class CharT>
684 class Suffix {
685  public:
686  using StrT = std::basic_string<CharT>;
687  using CondT = Condition<CharT>;
688 
689  char16_t flag = 0;
690  bool cross_product = false;
691  StrT stripping;
692  StrT appending;
693  Flag_Set cont_flags;
694  CondT condition;
695 
696  Suffix() = default;
697  Suffix(char16_t flag, bool cross_product, const StrT& strip,
698  const StrT& append, const Flag_Set& cont_flags,
699  const StrT& condition)
700  : flag(flag), cross_product(cross_product), stripping(strip),
701  appending(append), cont_flags(cont_flags), condition(condition)
702  {
703  }
704 
705  auto to_root(StrT& word) const -> StrT&
706  {
707  return word.replace(word.size() - appending.size(),
708  appending.size(), stripping);
709  }
710  auto to_root_copy(StrT word) const -> StrT
711  {
712  to_root(word);
713  return word;
714  }
715 
716  auto to_derived(StrT& word) const -> StrT&
717  {
718  return word.replace(word.size() - stripping.size(),
719  stripping.size(), appending);
720  }
721  auto to_derived_copy(StrT word) const -> StrT
722  {
723  to_derived(word);
724  return word;
725  }
726 
727  auto check_condition(const StrT& word) const -> bool
728  {
729  return condition.match_suffix(word);
730  }
731 };
732 extern template class Prefix<char>;
733 extern template class Prefix<wchar_t>;
734 extern template class Suffix<char>;
735 extern template class Suffix<wchar_t>;
736 
737 using boost::multi_index::member;
738 
739 #ifdef NUSPELL_STR_VIEW_NS
740 template <class CharT>
741 using my_string_view = NUSPELL_STR_VIEW_NS::basic_string_view<CharT>;
742 template <class CharT>
743 using sv_hash = std::hash<my_string_view<CharT>>;
744 #else
745 template <class CharT>
746 using my_string_view = boost::basic_string_view<CharT>;
747 template <class CharT>
748 struct sv_hash {
749  auto operator()(boost::basic_string_view<CharT> s) const
750  {
751  return boost::hash_range(begin(s), end(s));
752  }
753 };
754 #endif
755 
756 template <class CharT>
757 struct sv_eq {
758  auto operator()(my_string_view<CharT> l, my_string_view<CharT> r) const
759  {
760  return l == r;
761  }
762 };
763 
764 template <class CharT, class AffixT>
765 using Affix_Table_Base =
767  member<AffixT, std::basic_string<CharT>, &AffixT::appending>,
769 
770 template <class CharT, class AffixT>
771 class Affix_Table : private Affix_Table_Base<CharT, AffixT> {
772  private:
773  Flag_Set all_cont_flags;
774 
775  public:
777  using iterator = typename base::local_const_iterator;
778  template <class... Args>
779  auto emplace(Args&&... a)
780  {
781  auto it = base::emplace(std::forward<Args>(a)...);
782  all_cont_flags += it->cont_flags;
783  return it;
784  }
785  auto equal_range(my_string_view<CharT> appending) const
786  {
787  return base::equal_range(appending);
788  }
789  auto has_continuation_flags() const
790  {
791  return all_cont_flags.size() != 0;
792  }
793  auto has_continuation_flag(char16_t flag) const
794  {
795  return all_cont_flags.contains(flag);
796  }
797 };
798 
799 template <class CharT>
801 
802 template <class CharT>
804 
805 template <class CharT>
806 class String_Pair {
807  using StrT = std::basic_string<CharT>;
808  size_t i = 0;
809  StrT s;
810 
811  public:
812  String_Pair() = default;
813  template <class Str1>
824  String_Pair(Str1&& str, size_t i) : i(i), s(std::forward<Str1>(str))
825  {
826  if (i > s.size()) {
827  throw std::out_of_range("word split is too long");
828  }
829  }
830 
831  template <
832  class Str1, class Str2,
833  class = std::enable_if_t<
834  std::is_same<std::remove_reference_t<Str1>, StrT>::value &&
835  std::is_same<std::remove_reference_t<Str2>, StrT>::value>>
836  String_Pair(Str1&& first, Str2&& second)
837  : i(first.size()) /* must be init before s, before we move
838  from first */
839  ,
840  s(std::forward<Str1>(first) + std::forward<Str2>(second))
841 
842  {
843  }
844  auto first() const { return my_string_view<CharT>(s).substr(0, i); }
845  auto second() const { return my_string_view<CharT>(s).substr(i); }
846  auto first(my_string_view<CharT> x)
847  {
848  s.replace(0, i, x.data(), x.size());
849  i = x.size();
850  }
851  auto second(my_string_view<CharT> x)
852  {
853  s.replace(i, s.npos, x.data(), x.size());
854  }
855  auto& str() const { return s; }
856  auto idx() const { return i; }
857 };
858 template <class CharT>
860  using StrT = std::basic_string<CharT>;
861 
862  String_Pair<CharT> begin_end_chars;
863  StrT replacement;
864  char16_t first_word_flag = 0;
865  char16_t second_word_flag = 0;
866  bool match_first_only_unaffixed_or_zero_affixed = false;
867 };
868 
870  std::vector<std::u16string> rules;
871  Flag_Set all_flags;
872 
873  auto fill_all_flags() -> void;
874 
875  public:
876  Compound_Rule_Table() = default;
877  Compound_Rule_Table(const std::vector<std::u16string>& tbl) : rules(tbl)
878  {
879  fill_all_flags();
880  }
881  Compound_Rule_Table(std::vector<std::u16string>&& tbl)
882  : rules(move(tbl))
883  {
884  fill_all_flags();
885  }
886  auto operator=(const std::vector<std::u16string>& tbl)
887  {
888  rules = tbl;
889  fill_all_flags();
890  return *this;
891  }
892  auto operator=(std::vector<std::u16string>&& tbl)
893  {
894  rules = move(tbl);
895  fill_all_flags();
896  return *this;
897  }
898  auto empty() const { return rules.empty(); }
899  auto has_any_of_flags(const Flag_Set& f) const -> bool;
900  auto match_any_rule(const std::vector<const Flag_Set*> data) const
901  -> bool;
902 };
903 
907 template <class CharT>
909  using VecT = std::vector<std::basic_string<CharT>>;
910  VecT d;
911  size_t sz = 0;
912 
913  public:
914  using value_type = typename VecT::value_type;
915  using allocator_type = typename VecT::allocator_type;
916  using size_type = typename VecT::size_type;
917  using difference_type = typename VecT::difference_type;
918  using reference = typename VecT::reference;
919  using const_reference = typename VecT::const_reference;
920  using pointer = typename VecT::pointer;
921  using const_pointer = typename VecT::const_pointer;
922  using iterator = typename VecT::iterator;
923  using const_iterator = typename VecT::const_iterator;
924  using reverse_iterator = typename VecT::reverse_iterator;
925  using const_reverse_iterator = typename VecT::const_reverse_iterator;
926 
927  List_Basic_Strings() = default;
928  explicit List_Basic_Strings(size_type n) : d(n), sz(n) {}
929  List_Basic_Strings(size_type n, const_reference value)
930  : d(n, value), sz(n)
931  {
932  }
933  template <class InputIterator>
934  List_Basic_Strings(InputIterator first, InputIterator last)
935  : d(first, last), sz(d.size())
936  {
937  }
938  List_Basic_Strings(std::initializer_list<value_type> il)
939  : d(il), sz(d.size())
940  {
941  }
942 
943  List_Basic_Strings(const List_Basic_Strings& other) = default;
945  : d(move(other.d)), sz(other.sz)
946  {
947  other.sz = 0;
948  }
949  auto& operator=(const List_Basic_Strings& other)
950  {
951  clear();
952  insert(begin(), other.begin(), other.end());
953  return *this;
954  }
955  auto& operator=(List_Basic_Strings&& other)
956  {
957  d = move(other.d);
958  sz = other.sz;
959  other.sz = 0;
960  return *this;
961  }
962  auto& operator=(std::initializer_list<value_type> il)
963  {
964  clear();
965  insert(begin(), il);
966  return *this;
967  }
968  template <class InputIterator>
969  auto assign(InputIterator first, InputIterator last)
970  {
971  clear();
972  insert(begin(), first, last);
973  }
974  void assign(size_type n, const_reference value)
975  {
976  clear();
977  insert(begin(), n, value);
978  }
979  void assign(std::initializer_list<value_type> il) { *this = il; }
980  auto get_allocator() const noexcept { return d.get_allocator(); }
981 
982  // iterators
983  auto begin() noexcept -> iterator { return d.begin(); }
984  auto begin() const noexcept -> const_iterator { return d.begin(); }
985  auto end() noexcept -> iterator { return begin() + sz; }
986  auto end() const noexcept -> const_iterator { return begin() + sz; }
987 
988  auto rbegin() noexcept { return d.rend() - sz; }
989  auto rbegin() const noexcept { return d.rend() - sz; }
990  auto rend() noexcept { return d.rend(); }
991  auto rend() const noexcept { return d.rend(); }
992 
993  auto cbegin() const noexcept { return d.cbegin(); }
994  auto cend() const noexcept { return cbegin() + sz; }
995 
996  auto crbegin() const noexcept { return d.crend() - sz; }
997  auto crend() const noexcept { return d.crend(); }
998 
999  // [vector.capacity], capacity
1000  auto empty() const noexcept { return sz == 0; }
1001  auto size() const noexcept { return sz; }
1002  auto max_size() const noexcept { return d.max_size(); }
1003  auto capacity() const noexcept { return d.size(); }
1004  auto resize(size_type new_sz)
1005  {
1006  if (new_sz <= sz) {
1007  }
1008  else if (new_sz <= d.size()) {
1009  std::for_each(begin() + sz, begin() + new_sz,
1010  [](auto& s) { s.clear(); });
1011  }
1012  else {
1013  std::for_each(d.begin() + sz, d.end(),
1014  [](auto& s) { s.clear(); });
1015  d.resize(new_sz);
1016  }
1017  sz = new_sz;
1018  }
1019  auto resize(size_type new_sz, const_reference c)
1020  {
1021  if (new_sz <= sz) {
1022  }
1023  else if (new_sz <= d.size()) {
1024  std::for_each(begin() + sz, begin() + new_sz,
1025  [&](auto& s) { s = c; });
1026  }
1027  else {
1028  std::for_each(d.begin() + sz, d.end(),
1029  [&](auto& s) { s = c; });
1030  d.resize(new_sz, c);
1031  }
1032  sz = new_sz;
1033  }
1034  void reserve(size_type n)
1035  {
1036  if (n > d.size())
1037  d.resize(n);
1038  }
1039  void shrink_to_fit()
1040  {
1041  d.resize(sz);
1042  d.shrink_to_fit();
1043  for (auto& s : d) {
1044  s.shrink_to_fit();
1045  }
1046  }
1047 
1048  // element access
1049  auto& operator[](size_type n) { return d[n]; }
1050  auto& operator[](size_type n) const { return d[n]; }
1051  auto& at(size_type n)
1052  {
1053  if (n < sz)
1054  return d[n];
1055  else
1056  throw std::out_of_range("index is out of range");
1057  }
1058  auto& at(size_type n) const
1059  {
1060  if (n < sz)
1061  return d[n];
1062  else
1063  throw std::out_of_range("index is out of range");
1064  }
1065  auto& front() { return d.front(); }
1066  auto& front() const { return d.front(); }
1067  auto& back() { return d[sz - 1]; }
1068  auto& back() const { return d[sz - 1]; }
1069 
1070  // [vector.data], data access
1071  auto data() noexcept { return d.data(); }
1072  auto data() const noexcept { return d.data(); }
1073 
1074  // [vector.modifiers], modifiers
1075  template <class... Args>
1076  auto& emplace_back(Args&&... args)
1077  {
1078  if (sz != d.size())
1079  d[sz] = value_type(std::forward<Args>(args)...);
1080  else
1081  d.emplace_back(std::forward<Args>(args)...);
1082  return d[sz++];
1083  }
1084  auto& emplace_back()
1085  {
1086  if (sz != d.size())
1087  d[sz].clear();
1088  else
1089  d.emplace_back();
1090  return d[sz++];
1091  }
1092  auto push_back(const_reference x)
1093  {
1094  if (sz != d.size())
1095  d[sz] = x;
1096  else
1097  d.push_back(x);
1098  ++sz;
1099  }
1100  auto push_back(value_type&& x)
1101  {
1102  if (sz != d.size())
1103  d[sz] = std::move(x);
1104  else
1105  d.push_back(std::move(x));
1106  ++sz;
1107  }
1108  auto pop_back() { --sz; }
1109 
1110  private:
1111  template <class U>
1112  auto insert_priv(const_iterator pos, U&& val)
1113  {
1114  if (sz != d.size()) {
1115  d[sz] = std::forward<U>(val);
1116  }
1117  else {
1118  auto pos_idx = pos - cbegin();
1119  d.push_back(std::forward<U>(val));
1120  pos = cbegin() + pos_idx;
1121  }
1122  auto p = begin() + (pos - cbegin());
1123  std::rotate(p, begin() + sz, begin() + sz + 1);
1124  ++sz;
1125  return p;
1126  }
1127 
1128  public:
1129  template <class... Args>
1130  auto emplace(const_iterator pos, Args&&... args)
1131  {
1132  if (sz != d.size()) {
1133  d[sz] = value_type(std::forward<Args>(args)...);
1134  }
1135  else {
1136  auto pos_idx = pos - cbegin();
1137  d.emplace(std::forward<Args>(args)...);
1138  pos = cbegin() + pos_idx;
1139  }
1140  auto p = begin() + (pos - cbegin());
1141  std::rotate(p, begin() + sz, begin() + sz + 1);
1142  ++sz;
1143  return p;
1144  }
1145  auto insert(const_iterator pos, const_reference x)
1146  {
1147  return insert_priv(pos, x);
1148  }
1149  auto insert(const_iterator pos, value_type&& x)
1150  {
1151  return insert_priv(pos, std::move(x));
1152  }
1153  auto insert(const_iterator pos, size_type n, const_reference x)
1154  -> iterator
1155  {
1156  auto i = sz;
1157  while (n != 0 && i != d.size()) {
1158  d[i] = x;
1159  --n;
1160  ++i;
1161  }
1162  if (n != 0) {
1163  auto pos_idx = pos - cbegin();
1164  d.insert(d.end(), n, x);
1165  pos = cbegin() + pos_idx;
1166  i = d.size();
1167  }
1168  auto p = begin() + (pos - cbegin());
1169  std::rotate(p, begin() + sz, begin() + i);
1170  sz = i;
1171  return p;
1172  }
1173 
1174  template <class InputIterator>
1175  auto insert(const_iterator pos, InputIterator first, InputIterator last)
1176  -> iterator
1177  {
1178  auto i = sz;
1179  while (first != last && i != d.size()) {
1180  d[i] = *first;
1181  ++first;
1182  ++i;
1183  }
1184  if (first != last) {
1185  auto pos_idx = pos - cbegin();
1186  d.insert(d.end(), first, last);
1187  pos = cbegin() + pos_idx;
1188  i = d.size();
1189  }
1190  auto p = begin() + (pos - cbegin());
1191  std::rotate(p, begin() + sz, begin() + i);
1192  sz = i;
1193  return p;
1194  }
1195  auto insert(const_iterator pos, std::initializer_list<value_type> il)
1196  -> iterator
1197  {
1198  return insert(pos, il.begin(), il.end());
1199  }
1200 
1201  auto erase(const_iterator position)
1202  {
1203  auto i0 = begin();
1204  auto i1 = i0 + (position - cbegin());
1205  auto i2 = i1 + 1;
1206  auto i3 = end();
1207  std::rotate(i1, i2, i3);
1208  --sz;
1209  return i1;
1210  }
1211  auto erase(const_iterator first, const_iterator last)
1212  {
1213  auto i0 = begin();
1214  auto i1 = i0 + (first - cbegin());
1215  auto i2 = i0 + (last - cbegin());
1216  auto i3 = end();
1217  std::rotate(i1, i2, i3);
1218  sz -= last - first;
1219  return i1;
1220  }
1221  auto swap(List_Basic_Strings& other)
1222  {
1223  d.swap(other.d);
1224  std::swap(sz, other.sz);
1225  }
1226  auto clear() noexcept -> void { sz = 0; }
1227 
1228  auto operator==(const List_Basic_Strings& other) const
1229  {
1230  return std::equal(begin(), end(), other.begin(), other.end());
1231  }
1232  auto operator!=(const List_Basic_Strings& other) const
1233  {
1234  return !(*this == other);
1235  }
1236  auto operator<(const List_Basic_Strings& other) const
1237  {
1238  return std::lexicographical_compare(begin(), end(),
1239  other.begin(), other.end());
1240  }
1241  auto operator>=(const List_Basic_Strings& other) const
1242  {
1243  return !(*this < other);
1244  }
1245  auto operator>(const List_Basic_Strings& other) const
1246  {
1247  return std::lexicographical_compare(other.begin(), other.end(),
1248  begin(), end());
1249  }
1250  auto operator<=(const List_Basic_Strings& other) const
1251  {
1252  return !(*this > other);
1253  }
1254 };
1255 
1256 template <class CharT>
1258 {
1259  a.swap(b);
1260 }
1261 
1262 inline namespace v2 {
1263 using List_Strings = List_Basic_Strings<char>;
1264 }
1266 
1267 template <class CharT>
1269  public:
1270  using StrT = std::basic_string<CharT>;
1271  using Table_Str = std::vector<std::pair<StrT, StrT>>;
1272  using iterator = typename Table_Str::iterator;
1273  using const_iterator = typename Table_Str::const_iterator;
1274 
1275  private:
1276  Table_Str table;
1277  size_t whole_word_reps_last_idx = 0;
1278  size_t start_word_reps_last_idx = 0;
1279  size_t end_word_reps_last_idx = 0;
1280 
1281  auto order_entries() -> void; // implemented in cxx
1282 
1283  public:
1284  Replacement_Table() = default;
1285  Replacement_Table(const Table_Str& v) : table(v) { order_entries(); }
1286  Replacement_Table(Table_Str&& v) : table(move(v)) { order_entries(); }
1287 
1288  auto& operator=(const Table_Str& v)
1289  {
1290  table = v;
1291  order_entries();
1292  return *this;
1293  }
1294 
1295  auto& operator=(Table_Str&& v)
1296  {
1297  table = move(v);
1298  order_entries();
1299  return *this;
1300  }
1301 
1302  template <class Range>
1303  auto& operator=(const Range& range)
1304  {
1305  table.assign(std::begin(range), std::end(range));
1306  order_entries();
1307  return *this;
1308  }
1309 
1310  auto whole_word_replacements() const
1311  -> boost::iterator_range<const_iterator>
1312  {
1313  return {begin(table), begin(table) + whole_word_reps_last_idx};
1314  }
1315  auto start_word_replacements() const
1316  -> boost::iterator_range<const_iterator>
1317  {
1318  return {begin(table) + whole_word_reps_last_idx,
1319  begin(table) + start_word_reps_last_idx};
1320  }
1321  auto end_word_replacements() const
1322  -> boost::iterator_range<const_iterator>
1323  {
1324  return {begin(table) + start_word_reps_last_idx,
1325  begin(table) + end_word_reps_last_idx};
1326  }
1327  auto any_place_replacements() const
1328  -> boost::iterator_range<const_iterator>
1329  {
1330  return {begin(table) + end_word_reps_last_idx, end(table)};
1331  }
1332 };
1333 extern template class Break_Table<char>;
1334 extern template class Break_Table<wchar_t>;
1335 
1336 template <class CharT>
1338  using StrT = std::basic_string<CharT>;
1339  StrT chars;
1340  std::vector<StrT> strings;
1341 
1342  auto parse(const StrT& s) -> void;
1343  Similarity_Group() = default;
1344  Similarity_Group(const StrT& s) { parse(s); }
1345  auto& operator=(const StrT& s)
1346  {
1347  parse(s);
1348  return *this;
1349  }
1350 };
1351 
1352 template <class CharT>
1354  using StrT = std::basic_string<CharT>;
1355 
1356  struct Phonet_Match_Result {
1357  size_t count_matched = 0;
1358  size_t go_back_before_replace = 0;
1359  size_t priority = 5;
1360  bool go_back_after_replace = false;
1361  bool treat_next_as_begin = false;
1362  operator bool() { return count_matched; }
1363  };
1364 
1365  std::vector<std::pair<StrT, StrT>> table;
1366  auto order() -> void;
1367  auto static match(const StrT& data, size_t i, const StrT& pattern,
1368  bool at_begin) -> Phonet_Match_Result;
1369 
1370  public:
1371  Phonetic_Table() = default;
1372  Phonetic_Table(const std::vector<std::pair<StrT, StrT>>& v) : table(v)
1373  {
1374  order();
1375  }
1376  Phonetic_Table(std::vector<std::pair<StrT, StrT>>&& v) : table(move(v))
1377  {
1378  order();
1379  }
1380  auto& operator=(const std::vector<std::pair<StrT, StrT>>& v)
1381  {
1382  table = v;
1383  order();
1384  return *this;
1385  }
1386  auto& operator=(std::vector<std::pair<StrT, StrT>>&& v)
1387  {
1388  table = move(v);
1389  order();
1390  return *this;
1391  }
1392  template <class Range>
1393  auto& operator=(const Range& range)
1394  {
1395  table.assign(std::begin(range), std::end(range));
1396  order();
1397  return *this;
1398  }
1399  auto replace(StrT& word) const -> bool;
1400 };
1401 
1402 } // namespace nuspell
1403 #endif // NUSPELL_STRUCTURES_HXX
Definition: structures.hxx:313
Definition: structures.hxx:869
Definition: structures.hxx:1353
Definition: structures.hxx:418
Definition: structures.hxx:1268
Vector of strings that recycles erased strings.
Definition: structures.hxx:908
Definition: structures.hxx:428
Definition: main.cxx:622
Definition: structures.hxx:636
Definition: structures.hxx:757
String_Pair(Str1 &&str, size_t i)
Construct string pair.
Definition: structures.hxx:824
Library main namespace.
Definition: aff_data.cxx:74
Definition: structures.hxx:359
Definition: structures.hxx:859
A Set class backed by a string.
Definition: structures.hxx:68
Span_Type
Definition: structures.hxx:601
Definition: structures.hxx:1337
Definition: structures.hxx:748
Definition: structures.hxx:684
Definition: structures.hxx:771
Definition: structures.hxx:806
Limited regular expression matching used in affix entries.
Definition: structures.hxx:599