19#ifndef NUSPELL_STRUCTURES_HXX
20#define NUSPELL_STRUCTURES_HXX
26#include <forward_list>
37NUSPELL_BEGIN_INLINE_NAMESPACE
42 typename std::iterator_traits<It>::iterator_category;
48 Subrange(std::pair<It, It> p) : a(p.first), b(p.second) {}
49 Subrange(It first, It last) : a(first), b(last) {}
50 template <
class Range>
51 Subrange(Range& r) : a(r.begin()), b(r.end())
54 auto begin() const -> It {
return a; }
55 auto end() const -> It {
return b; }
59Subrange(
const Range& r) -> Subrange<typename Range::const_iterator>;
61Subrange(Range& r) -> Subrange<typename Range::iterator>;
73 std::basic_string<CharT> d;
74 auto sort_uniq() ->
void
78 using t = traits_type;
79 sort(first, last, t::lt);
80 d.erase(unique(first, last, t::eq), last);
82 struct Char_Traits_Less_Than {
83 auto operator()(CharT a, CharT b)
const noexcept
85 return traits_type::lt(a, b);
90 using Str = std::basic_string<CharT>;
91 using traits_type =
typename Str::traits_type;
93 using key_type =
typename Str::value_type;
94 using key_compare = Char_Traits_Less_Than;
95 using value_type =
typename Str::value_type;
96 using value_compare = key_compare;
97 using allocator_type =
typename Str::allocator_type;
98 using pointer =
typename Str::pointer;
99 using const_pointer =
typename Str::const_pointer;
100 using reference =
typename Str::reference;
101 using const_reference =
typename Str::const_reference;
102 using size_type =
typename Str::size_type;
103 using difference_type =
typename Str::difference_type;
104 using iterator =
typename Str::iterator;
105 using const_iterator =
typename Str::const_iterator;
106 using reverse_iterator =
typename Str::reverse_iterator;
107 using const_reverse_iterator =
typename Str::const_reverse_iterator;
109 String_Set() =
default;
110 explicit String_Set(
const Str& s) : d(s) { sort_uniq(); }
111 explicit String_Set(Str&& s) : d(move(s)) { sort_uniq(); }
112 explicit String_Set(
const CharT* s) : d(s) { sort_uniq(); }
113 template <
class InputIterator>
114 String_Set(InputIterator first, InputIterator last) : d(first, last)
118 String_Set(std::initializer_list<value_type> il) : d(il)
123 auto& operator=(
const Str& s)
129 auto& operator=(Str&& s)
135 auto& operator=(std::initializer_list<value_type> il)
141 auto& operator=(
const CharT* s)
149 auto data() const noexcept -> const_pointer {
return d.data(); }
150 operator const Str&()
const noexcept {
return d; }
151 auto& str() const noexcept {
return d; }
154 iterator begin() noexcept {
return d.begin(); }
155 const_iterator begin() const noexcept {
return d.begin(); }
156 iterator end() noexcept {
return d.end(); }
157 const_iterator end() const noexcept {
return d.end(); }
159 reverse_iterator rbegin() noexcept {
return d.rbegin(); }
160 const_reverse_iterator rbegin() const noexcept {
return d.rbegin(); }
161 reverse_iterator rend() noexcept {
return d.rend(); }
162 const_reverse_iterator rend() const noexcept {
return d.rend(); }
164 const_iterator cbegin() const noexcept {
return d.cbegin(); }
165 const_iterator cend() const noexcept {
return d.cend(); }
166 const_reverse_iterator crbegin() const noexcept {
return d.crbegin(); }
167 const_reverse_iterator crend() const noexcept {
return d.crend(); }
170 bool empty() const noexcept {
return d.empty(); }
171 size_type size() const noexcept {
return d.size(); }
172 size_type max_size() const noexcept {
return d.max_size(); }
175 std::pair<iterator, bool> insert(value_type x)
177 auto it = lower_bound(x);
178 if (it != end() && *it == x) {
181 auto ret = d.insert(it, x);
184 iterator insert(iterator hint, value_type x)
186 if (hint == end() || traits_type::lt(x, *hint)) {
187 if (hint == begin() ||
188 traits_type::lt(*(hint - 1), x)) {
189 return d.insert(hint, x);
192 return insert(x).first;
196 template <
class InputIterator>
197 void insert(InputIterator first, InputIterator last)
199 d.insert(end(), first, last);
202 void insert(std::initializer_list<value_type> il)
207 template <
class... Args>
208 std::pair<iterator, bool> emplace(Args&&... args)
210 return insert(CharT(args...));
213 template <
class... Args>
214 iterator emplace_hint(iterator hint, Args&&... args)
216 return insert(hint, CharT(args...));
219 iterator erase(iterator position) {
return d.erase(position); }
220 size_type erase(key_type x)
229 iterator erase(iterator first, iterator last)
231 return d.erase(first, last);
233 void swap(String_Set& s) { d.swap(s.d); }
234 void clear() noexcept { d.clear(); }
237 auto insert(
const Str& s) ->
void
242 auto& operator+=(
const Str& s)
249 key_compare key_comp()
const {
return Char_Traits_Less_Than(); }
250 value_compare value_comp()
const {
return key_comp(); }
254 auto lookup(key_type x)
const
263 iterator find(key_type x) {
return begin() + lookup(x); }
264 const_iterator find(key_type x)
const {
return begin() + lookup(x); }
265 size_type count(key_type x)
const {
return d.find(x) != d.npos; }
267 iterator lower_bound(key_type x)
269 return std::lower_bound(begin(), end(), x, traits_type::lt);
272 const_iterator lower_bound(key_type x)
const
274 return std::lower_bound(begin(), end(), x, traits_type::lt);
276 iterator upper_bound(key_type x)
278 return std::upper_bound(begin(), end(), x, traits_type::lt);
281 const_iterator upper_bound(key_type x)
const
283 return std::upper_bound(begin(), end(), x, traits_type::lt);
285 std::pair<iterator, iterator> equal_range(key_type x)
287 return std::equal_range(begin(), end(), x, traits_type::lt);
290 std::pair<const_iterator, const_iterator> equal_range(key_type x)
const
292 return std::equal_range(begin(), end(), x, traits_type::lt);
296 bool contains(key_type x)
const {
return count(x); }
299 bool operator<(
const String_Set& rhs)
const {
return d < rhs.d; }
300 bool operator<=(
const String_Set& rhs)
const {
return d <= rhs.d; }
301 bool operator==(
const String_Set& rhs)
const {
return d == rhs.d; }
302 bool operator!=(
const String_Set& rhs)
const {
return d != rhs.d; }
303 bool operator>=(
const String_Set& rhs)
const {
return d >= rhs.d; }
304 bool operator>(
const String_Set& rhs)
const {
return d > rhs.d; }
307template <
class CharT>
308auto swap(String_Set<CharT>& a, String_Set<CharT>& b)
313using Flag_Set = String_Set<char16_t>;
315class Substr_Replacer {
317 using Str = std::string;
318 using Str_View = std::string_view;
319 using Pair_Str = std::pair<Str, Str>;
320 using Table_Pairs = std::vector<Pair_Str>;
324 auto sort_uniq() -> void;
325 auto find_match(Str_View s)
const;
328 Substr_Replacer() =
default;
329 explicit Substr_Replacer(
const Table_Pairs& v) : table(v)
333 explicit Substr_Replacer(Table_Pairs&& v) : table(std::move(v))
338 auto& operator=(
const Table_Pairs& v)
344 auto& operator=(Table_Pairs&& v)
346 table = std::move(v);
351 auto replace(Str& s)
const -> Str&;
352 auto replace_copy(Str s)
const -> Str
359auto inline Substr_Replacer::sort_uniq() ->
void
361 auto first = begin(table);
362 auto last = end(table);
363 sort(first, last, [](
auto& a,
auto& b) {
return a.first < b.first; });
364 auto it = unique(first, last,
365 [](
auto& a,
auto& b) {
return a.first == b.first; });
366 table.erase(it, last);
369 if (!table.empty() && table.front().first.empty())
370 table.erase(begin(table));
373auto inline Substr_Replacer::find_match(Str_View s)
const
376 struct Comparer_Str_Rep {
377 auto static cmp_prefix_of(
const Str& p, Str_View of)
379 return p.compare(0, p.npos, of.data(),
380 std::min(p.size(), of.size()));
382 auto operator()(
const Pair_Str& a, Str_View b)
384 return cmp_prefix_of(a.first, b) < 0;
386 auto operator()(Str_View a,
const Pair_Str& b)
388 return cmp_prefix_of(b.first, a) > 0;
390 auto static eq(
const Pair_Str& a, Str_View b)
392 return cmp_prefix_of(a.first, b) == 0;
395 Comparer_Str_Rep csr;
397 auto last_match = end(t);
399 auto it2 = upper_bound(it, end(t), s, csr);
405 if (csr.eq(*it2, s)) {
419auto inline Substr_Replacer::replace(Str& s)
const -> Str&
424 for (
size_t i = 0; i < s.size(); ) {
425 auto substr = Str_View(&s[i], s.size() - i);
426 auto it = find_match(substr);
427 if (it != end(table)) {
431 s.replace(i, match.first.size(), match.second);
432 i += match.second.size();
442 using Str = std::string;
443 using Table_Str = std::vector<Str>;
444 using iterator =
typename Table_Str::iterator;
445 using const_iterator =
typename Table_Str::const_iterator;
449 size_t start_word_breaks_last_idx = 0;
450 size_t end_word_breaks_last_idx = 0;
452 auto order_entries() -> void;
455 Break_Table() =
default;
456 explicit Break_Table(
const Table_Str& v) : table(v) { order_entries(); }
457 explicit Break_Table(Table_Str&& v) : table(std::move(v))
462 auto& operator=(
const Table_Str& v)
469 auto& operator=(Table_Str&& v)
471 table = std::move(v);
476 auto start_word_breaks() const -> Subrange<const_iterator>
478 return {begin(table),
479 begin(table) + start_word_breaks_last_idx};
481 auto end_word_breaks() const -> Subrange<const_iterator>
483 return {begin(table) + start_word_breaks_last_idx,
484 begin(table) + end_word_breaks_last_idx};
486 auto middle_word_breaks() const -> Subrange<const_iterator>
488 return {begin(table) + end_word_breaks_last_idx, end(table)};
491auto inline Break_Table::order_entries() ->
void
493 auto it = remove_if(begin(table), end(table), [](
auto& s) {
495 (s.size() == 1 && (s[0] ==
'^' || s[0] ==
'$'));
497 table.erase(it, end(table));
499 auto is_start_word_break = [=](
auto& x) {
return x[0] ==
'^'; };
500 auto is_end_word_break = [=](
auto& x) {
return x.back() ==
'$'; };
501 auto start_word_breaks_last =
502 partition(begin(table), end(table), is_start_word_break);
503 start_word_breaks_last_idx = start_word_breaks_last - begin(table);
505 for_each(begin(table), start_word_breaks_last,
506 [](
auto& e) { e.erase(0, 1); });
508 auto end_word_breaks_last =
509 partition(start_word_breaks_last, end(table), is_end_word_break);
510 end_word_breaks_last_idx = end_word_breaks_last - begin(table);
512 for_each(start_word_breaks_last, end_word_breaks_last,
513 [](
auto& e) { e.pop_back(); });
518 constexpr auto operator()(T&& t)
const noexcept -> T&&
520 return std::forward<T>(t);
524template <
class Key,
class T>
526 using bucket_type = std::forward_list<std::pair<Key, T>>;
527 static constexpr float max_load_fact = 7.0 / 8.0;
528 std::vector<bucket_type> data;
530 size_t max_load_factor_capacity = 0;
533 using key_type = Key;
534 using mapped_type = T;
535 using value_type = std::pair<Key, T>;
536 using size_type = std::size_t;
537 using difference_type = std::ptrdiff_t;
538 using hasher = std::hash<Key>;
539 using reference = value_type&;
540 using const_reference =
const value_type&;
541 using pointer =
typename bucket_type::pointer;
542 using const_pointer =
typename bucket_type::const_pointer;
543 using local_iterator =
typename bucket_type::iterator;
544 using local_const_iterator =
typename bucket_type::const_iterator;
546 Hash_Multimap() =
default;
548 auto size() const noexcept {
return sz; }
549 auto empty() const noexcept {
return size() == 0; }
551 auto rehash(
size_t count)
554 size_t capacity = 16;
555 while (capacity <= count)
557 data.resize(capacity);
558 max_load_factor_capacity =
559 std::ceil(capacity * max_load_fact);
562 if (count < size() / max_load_fact)
563 count = size() / max_load_fact;
564 auto n = Hash_Multimap();
566 for (
auto& b : data) {
568 n.insert(std::move(x));
573 max_load_factor_capacity = n.max_load_factor_capacity;
576 auto reserve(
size_t count) ->
void
578 rehash(std::ceil(count / max_load_fact));
582 auto prepare_for_insert(
const key_type& key)
583 -> std::pair<bucket_type&, local_iterator>
585 auto hash = hasher();
586 if (sz == max_load_factor_capacity) {
590 auto h_mod = h & (data.size() - 1);
591 auto& bucket = data[h_mod];
592 auto prev = bucket.before_begin();
593 auto curr = begin(bucket);
595 for (; curr != end(bucket); prev = curr++) {
596 if (curr->first == key)
600 for (; curr != end(bucket); prev = curr++) {
601 if (curr->first != key)
605 return {bucket, prev};
609 auto insert(value_type&& value)
611 auto& key = value.first;
612 auto [bucket, it] = prepare_for_insert(key);
613 return bucket.insert_after(it, move(value));
615 template <
class... Args>
616 auto emplace(Args&&... a)
618 auto node = bucket_type();
619 auto& val = node.emplace_front(std::forward<Args>(a)...);
620 auto& key = val.first;
621 auto [bucket, it] = prepare_for_insert(key);
622 bucket.splice_after(it, node, node.before_begin());
626 auto equal_range(
const key_type& key)
const
627 -> std::pair<local_const_iterator, local_const_iterator>
629 auto hash = hasher();
633 auto h_mod = h & (data.size() - 1);
634 auto& bucket = data[h_mod];
635 auto eq_key = [&](
auto& x) {
return key == x.first; };
636 auto first = std::find_if(begin(bucket), end(bucket), eq_key);
637 if (first == end(bucket))
638 return {first, first};
639 auto last = std::find_if_not(next(first), end(bucket), eq_key);
640 return {first, last};
643 auto bucket_count() const -> size_type {
return data.size(); }
644 auto bucket_data(size_type i)
const {
return Subrange(data[i]); }
647struct Condition_Exception :
public std::runtime_error {
648 using std::runtime_error::runtime_error;
656 using Str = std::string;
657 using Str_View = std::string_view;
662 auto construct() -> void;
665 Condition() =
default;
666 explicit Condition(
const Str& condition) : cond(condition)
670 explicit Condition(Str&& condition) : cond(std::move(condition))
674 explicit Condition(
const char* condition) : cond(condition)
678 auto& operator=(
const Str& condition)
685 auto& operator=(Str&& condition)
687 cond = std::move(condition);
692 auto& operator=(
const char* condition)
699 auto& str()
const {
return cond; }
700 auto match_prefix(Str_View s)
const -> bool;
701 auto match_suffix(Str_View s)
const
703 auto cp_cnt = size_t(0);
705 for (; cp_cnt != num_cp && i != 0; ++cp_cnt) {
706 valid_u8_reverse_index(s, i);
708 if (cp_cnt != num_cp)
710 return match_prefix(s.substr(i));
713auto inline Condition::construct() ->
void
715 for (
size_t i = 0; i != size(cond);) {
716 size_t j = cond.find_first_of(
"[].", i);
720 valid_u8_advance_index(cond, i);
725 if (cond[i] ==
'.') {
730 else if (cond[i] ==
']') {
732 "closing bracket has no matching opening bracket";
733 throw Condition_Exception(what);
735 else if (cond[i] ==
'[') {
737 if (i == size(cond)) {
738 auto what =
"opening bracket has no matching "
740 throw Condition_Exception(what);
744 j = cond.find(
']', i);
746 auto what =
"empty bracket expression";
747 throw Condition_Exception(what);
749 if (j == cond.npos) {
750 auto what =
"opening bracket has no matching "
752 throw Condition_Exception(what);
760auto inline Condition::match_prefix(Str_View s)
const ->
bool
762 if (size(s) < num_cp)
765 auto s_i = size_t(0);
766 auto cond_i = size_t(0);
767 for (; s_i != size(s) && cond_i != size(cond); ++cond_i) {
769 auto cond_cu = cond[cond_i];
777 valid_u8_advance_index(s, s_i);
781 cond_cu = cond[cond_i];
785 valid_u8_advance_cp(s, s_i, str_cp);
787 while (cond[cond_i] !=
']') {
789 valid_u8_advance_cp(cond, cond_i, cond_cp);
790 if (str_cp == cond_cp)
793 if (cond_cu !=
'^' && !found)
795 if (cond_cu ==
'^' && found)
800 return cond_i == size(cond);
804 using Str = std::string;
807 bool cross_product =
false;
813 auto to_root(Str& word)
const -> Str&
815 return word.replace(0, appending.size(), stripping);
817 auto to_root_copy(Str word)
const -> Str
823 auto to_derived(Str& word)
const -> Str&
825 return word.replace(0, stripping.size(), appending);
827 auto to_derived_copy(Str word)
const -> Str
833 auto check_condition(
const Str& word)
const ->
bool
835 return condition.match_prefix(word);
840 using Str = std::string;
843 bool cross_product =
false;
849 auto to_root(Str& word)
const -> Str&
851 return word.replace(word.size() - appending.size(),
852 appending.size(), stripping);
854 auto to_root_copy(Str word)
const -> Str
860 auto to_derived(Str& word)
const -> Str&
862 return word.replace(word.size() - stripping.size(),
863 stripping.size(), appending);
865 auto to_derived_copy(Str word)
const -> Str
871 auto check_condition(
const Str& word)
const ->
bool
873 return condition.match_suffix(word);
877template <
class T,
class Key_Extr =
identity,
class Key_Transform =
identity>
878class Prefix_Multiset {
880 using Value_Type = T;
882 std::remove_reference_t<decltype(std::declval<Key_Extr>()(
883 std::declval<T>()))>;
884 using Char_Type =
typename Key_Type::value_type;
885 using Traits =
typename Key_Type::traits_type;
886 using Vector_Type = std::vector<T>;
887 using Iterator =
typename Vector_Type::const_iterator;
890 struct Ebo_Key_Extr :
public Key_Extr {};
891 struct Ebo_Key_Transf :
public Key_Transform {};
892 struct Ebo :
public Ebo_Key_Extr, Ebo_Key_Transf {
895 std::basic_string<Char_Type> first_letter;
896 std::vector<size_t> prefix_idx_with_first_letter;
898 auto key_extractor() const -> const Ebo_Key_Extr& {
return ebo; }
899 auto key_transformator() const -> const Ebo_Key_Transf& {
return ebo; }
900 auto& get_table() {
return ebo.table; }
901 auto& get_table()
const {
return ebo.table; }
905 auto& extract_key = key_extractor();
906 auto& transform_key = key_transformator();
907 auto& table = get_table();
909 std::sort(begin(table), end(table), [&](T& a, T& b) {
910 auto&& key_a = transform_key(extract_key(a));
911 auto&& key_b = transform_key(extract_key(b));
912 return key_a < key_b;
915 first_letter.clear();
916 prefix_idx_with_first_letter.clear();
917 auto first = begin(table);
918 auto last = end(table);
919 auto it = std::find_if_not(first, last, [=](
const T& x) {
920 auto&& k = transform_key(extract_key(x));
924 auto&& k1 = transform_key(extract_key(*it));
925 first_letter.push_back(k1[0]);
926 prefix_idx_with_first_letter.push_back(it - first);
928 it = std::upper_bound(
930 Comparator{0, extract_key, transform_key});
932 if (!prefix_idx_with_first_letter.empty())
933 prefix_idx_with_first_letter.push_back(last - first);
938 Key_Extr extract_key;
939 Key_Transform transform_key;
940 auto operator()(
const T& a, Char_Type b)
const
942 auto&& key_a = transform_key(extract_key(a));
943 return Traits::lt(key_a[i], b);
945 auto operator()(Char_Type a,
const T& b)
const
947 auto&& key_b = transform_key(extract_key(b));
948 return Traits::lt(a, key_b[i]);
953 Prefix_Multiset() =
default;
954 explicit Prefix_Multiset(Key_Extr ke, Key_Transform kt = {})
955 : ebo{{ke}, {kt}, {}}
958 explicit Prefix_Multiset(
const Vector_Type& v, Key_Extr ke = {},
959 Key_Transform kt = {})
964 explicit Prefix_Multiset(Vector_Type&& v, Key_Extr ke = {},
965 Key_Transform kt = {})
966 : ebo{{ke}, {kt}, std::move(v)}
970 Prefix_Multiset(std::initializer_list<T> list, Key_Extr ke = {},
971 Key_Transform kt = {})
972 : ebo{{ke}, {kt}, list}
976 auto& operator=(
const Vector_Type& v)
982 auto& operator=(Vector_Type&& v)
984 get_table() = std::move(v);
988 auto& operator=(std::initializer_list<T> list)
994 auto& data()
const {
return get_table(); }
996 template <
class Func>
997 auto for_each_prefixes_of(
const Key_Type& word, Func func)
const;
999 template <
class OutIter>
1000 auto copy_all_prefixes_of(
const Key_Type& word, OutIter o)
const
1002 for_each_prefixes_of(word, [&](
const T& x) { *o++ = x; });
1006 class Iter_Prefixes_Of {
1007 const Prefix_Multiset* set = {};
1010 const Key_Type* search_key = {};
1014 auto advance() -> void;
1017 using iterator_category = std::input_iterator_tag;
1018 using value_type = T;
1019 using size_type = size_t;
1020 using difference_type = ptrdiff_t;
1021 using reference =
const T&;
1022 using pointer =
const T*;
1024 Iter_Prefixes_Of() =
default;
1025 Iter_Prefixes_Of(
const Prefix_Multiset& set,
1026 const Key_Type& word)
1027 : set(&set), it(set.get_table().begin()),
1028 last(set.get_table().end()), search_key(&word),
1033 Iter_Prefixes_Of(
const Prefix_Multiset&, Key_Type&&) =
delete;
1034 Iter_Prefixes_Of(Prefix_Multiset&&,
const Key_Type&) =
delete;
1035 Iter_Prefixes_Of(Prefix_Multiset&&, Key_Type&&) =
delete;
1043 auto& operator++(
int)
1049 auto& operator*()
const {
return *it; }
1050 auto operator->()
const {
return &*it; }
1051 auto operator==(
const Iter_Prefixes_Of& other)
const
1053 return valid == other.valid;
1055 auto operator!=(
const Iter_Prefixes_Of& other)
const
1057 return valid != other.valid;
1060 operator bool()
const {
return valid; }
1062 auto begin()
const {
return *
this; }
1063 auto end()
const {
return Iter_Prefixes_Of(); }
1066 auto iterate_prefixes_of(
const Key_Type& word)
const
1068 return Iter_Prefixes_Of(*
this, word);
1070 auto iterate_prefixes_of(Key_Type&& word)
const =
delete;
1072template <
class T,
class Key_Extr,
class Key_Transform>
1073auto Prefix_Multiset<T, Key_Extr, Key_Transform>::Iter_Prefixes_Of::advance()
1076 auto& extract_key = set->key_extractor();
1077 auto& transform_key = set->key_transformator();
1078 auto& table = set->get_table();
1082 if (transform_key(extract_key(*it)).empty())
1084 if (transform_key(*search_key).empty()) {
1089 auto& first_letter = set->first_letter;
1090 auto& prefix_idx_with_first_letter =
1091 set->prefix_idx_with_first_letter;
1093 first_letter.find(transform_key(*search_key)[0]);
1094 if (idx == first_letter.npos) {
1099 auto first = table.begin();
1100 it = first + prefix_idx_with_first_letter[idx];
1101 last = first + prefix_idx_with_first_letter[idx + 1];
1111 if (transform_key(extract_key(*it)).size() == len)
1113 if (len == transform_key(*search_key).size()) {
1123 equal_range(it, last, transform_key(*search_key)[len],
1124 Comparator{len, extract_key, transform_key});
1128template <
class T,
class Key_Extr,
class Key_Transform>
1129template <
class Func>
1130auto Prefix_Multiset<T, Key_Extr, Key_Transform>::for_each_prefixes_of(
1131 const Key_Type& word, Func func)
const
1133 auto& extract_key = key_extractor();
1134 auto& transform_key = key_transformator();
1135 auto& table = get_table();
1137 auto first = begin(table);
1138 auto last = end(table);
1140 for (; it != last; ++it) {
1141 auto&& k = transform_key(extract_key(*it));
1150 if (transform_key(word).empty())
1153 auto idx = first_letter.find(transform_key(word)[0]);
1154 if (idx == first_letter.npos)
1156 first = begin(table) + prefix_idx_with_first_letter[idx];
1157 last = begin(table) + prefix_idx_with_first_letter[idx + 1];
1159 for (
size_t len = 1;; ++len) {
1161 for (; it != last &&
1162 transform_key(extract_key(*it)).size() == len;
1168 if (len == transform_key(word).size())
1171 equal_range(it, last, transform_key(word)[len],
1172 Comparator{len, extract_key, transform_key});
1176template <
class CharT>
1177class Reversed_String_View {
1179 using Str = std::basic_string<CharT>;
1180 using traits_type =
typename Str::traits_type;
1181 using value_type =
typename Str::value_type;
1182 using size_type =
typename Str::size_type;
1183 using const_iterator =
typename Str::const_reverse_iterator;
1186 const_iterator first = {};
1190 Reversed_String_View() =
default;
1191 explicit Reversed_String_View(
const Str& s)
1192 : first(s.rbegin()), sz(s.size())
1195 Reversed_String_View(Str&& s) =
delete;
1196 auto& operator[](size_type i)
const {
return first[i]; }
1197 auto size()
const {
return sz; }
1198 auto empty()
const {
return sz == 0; }
1199 auto begin()
const {
return first; }
1200 auto end()
const {
return first + sz; }
1201 auto operator<(Reversed_String_View other)
const
1203 return lexicographical_compare(begin(), end(), other.begin(),
1204 other.end(), traits_type::lt);
1208template <
class CharT>
1209struct String_Reverser {
1210 auto operator()(
const std::basic_string<CharT>& x)
const
1212 return Reversed_String_View<CharT>(x);
1217template <
class AffixT>
1218struct Extractor_Of_Appending_From_Affix {
1219 auto& operator()(
const AffixT& a)
const {
return a.appending; }
1222template <
class T,
class Key_Extr =
identity>
1223using Suffix_Multiset = Prefix_Multiset<
1225 String_Reverser<typename Prefix_Multiset<T, Key_Extr>::Char_Type>>;
1228 using Prefix_Multiset_Type =
1229 Prefix_Multiset<Prefix, Extractor_Of_Appending_From_Affix<Prefix>>;
1230 using Key_Type =
typename Prefix_Multiset_Type::Key_Type;
1231 using Vector_Type =
typename Prefix_Multiset_Type::Vector_Type;
1232 Prefix_Multiset_Type table;
1233 Flag_Set all_cont_flags;
1237 for (
auto& x : table.data())
1238 all_cont_flags += x.cont_flags;
1242 Prefix_Table() =
default;
1243 explicit Prefix_Table(
const Vector_Type& t) : table(t) { populate(); }
1244 explicit Prefix_Table(Vector_Type&& t) : table(std::move(t))
1248 auto& operator=(
const Vector_Type& t)
1254 auto& operator=(Vector_Type&& t)
1256 table = std::move(t);
1260 auto begin()
const {
return table.data().begin(); }
1261 auto end()
const {
return table.data().end(); }
1262 auto size()
const {
return table.data().size(); }
1264 auto has_continuation_flags()
const
1266 return all_cont_flags.size() != 0;
1268 auto has_continuation_flag(
char16_t flag)
const
1270 return all_cont_flags.contains(flag);
1272 auto iterate_prefixes_of(
const Key_Type& word)
const
1274 return table.iterate_prefixes_of(word);
1276 auto iterate_prefixes_of(Key_Type&& word)
const =
delete;
1280 using Suffix_Multiset_Type =
1281 Suffix_Multiset<Suffix, Extractor_Of_Appending_From_Affix<Suffix>>;
1282 using Key_Type =
typename Suffix_Multiset_Type::Key_Type;
1283 using Vector_Type =
typename Suffix_Multiset_Type::Vector_Type;
1284 Suffix_Multiset_Type table;
1285 Flag_Set all_cont_flags;
1289 for (
auto& x : table.data())
1290 all_cont_flags += x.cont_flags;
1294 Suffix_Table() =
default;
1295 explicit Suffix_Table(
const Vector_Type& t) : table(t) { populate(); }
1296 explicit Suffix_Table(Vector_Type&& t) : table(std::move(t))
1300 auto& operator=(
const Vector_Type& t)
1306 auto& operator=(Vector_Type&& t)
1308 table = std::move(t);
1312 auto begin()
const {
return table.data().begin(); }
1313 auto end()
const {
return table.data().end(); }
1314 auto size()
const {
return table.data().size(); }
1316 auto has_continuation_flags()
const
1318 return all_cont_flags.size() != 0;
1320 auto has_continuation_flag(
char16_t flag)
const
1322 return all_cont_flags.contains(flag);
1324 auto iterate_suffixes_of(
const Key_Type& word)
const
1326 return table.iterate_prefixes_of(word);
1328 auto iterate_suffixes_of(Key_Type&& word)
const =
delete;
1336 using Str = std::string;
1337 using Str_View = std::string_view;
1342 String_Pair() =
default;
1343 template <
class Str1>
1344 explicit String_Pair(Str1&& str,
size_t i)
1345 : i(i), s(std::forward<Str1>(str))
1348 throw std::out_of_range(
"word split is too long");
1351 template <
class Str1,
class Str2,
1352 class = std::enable_if_t<
1353 std::is_same<std::remove_reference_t<Str1>, Str>::value &&
1354 std::is_same<std::remove_reference_t<Str2>, Str>::value>>
1355 String_Pair(Str1&& first, Str2&& second)
1359 s(std::forward<Str1>(first) + std::forward<Str2>(second))
1363 auto first()
const {
return Str_View(s).substr(0, i); }
1364 auto second()
const {
return Str_View(s).substr(i); }
1365 auto first(Str_View x)
1370 auto second(Str_View x) { s.replace(i, s.npos, x); }
1371 auto& str()
const {
return s; }
1372 auto idx()
const {
return i; }
1374struct Compound_Pattern {
1375 String_Pair begin_end_chars;
1376 std::string replacement;
1377 char16_t first_word_flag = 0;
1378 char16_t second_word_flag = 0;
1379 bool match_first_only_unaffixed_or_zero_affixed =
false;
1382class Compound_Rule_Table {
1383 std::vector<std::u16string> rules;
1386 auto fill_all_flags() -> void;
1389 Compound_Rule_Table() =
default;
1390 explicit Compound_Rule_Table(
const std::vector<std::u16string>& tbl)
1395 explicit Compound_Rule_Table(std::vector<std::u16string>&& tbl)
1396 : rules(std::move(tbl))
1400 auto& operator=(
const std::vector<std::u16string>& tbl)
1406 auto& operator=(std::vector<std::u16string>&& tbl)
1408 rules = std::move(tbl);
1412 auto empty()
const {
return rules.empty(); }
1413 auto has_any_of_flags(
const Flag_Set& f)
const -> bool;
1414 auto match_any_rule(
const std::vector<const Flag_Set*>& data)
const
1417auto inline Compound_Rule_Table::fill_all_flags() ->
void
1419 for (
auto& f : rules) {
1422 all_flags.erase(u
'?');
1423 all_flags.erase(u
'*');
1426auto inline Compound_Rule_Table::has_any_of_flags(
const Flag_Set& f)
const
1431 struct Out_Iter_One_Bool {
1432 bool* value =
nullptr;
1433 auto& operator++() {
return *
this; }
1434 auto& operator++(
int) {
return *
this; }
1435 auto& operator*() {
return *
this; }
1436 auto& operator=(
char16_t)
1442 auto has_intersection =
false;
1443 auto out_it = Out_Iter_One_Bool{&has_intersection};
1444 std::set_intersection(begin(all_flags), end(all_flags), begin(f),
1446 return has_intersection;
1449template <
class DataIter,
class PatternIter,
class FuncEq = std::equal_to<>>
1450auto match_simple_regex(DataIter data_first, DataIter data_last,
1451 PatternIter pat_first, PatternIter pat_last,
1452 FuncEq eq = FuncEq())
1454 auto s = std::stack<std::pair<DataIter, PatternIter>>();
1455 s.emplace(data_first, pat_first);
1456 auto data_it = DataIter();
1457 auto pat_it = PatternIter();
1458 while (!s.empty()) {
1459 std::tie(data_it, pat_it) = s.top();
1461 if (pat_it == pat_last) {
1462 if (data_it == data_last)
1467 auto node_type = *pat_it;
1468 if (pat_it + 1 == pat_last)
1471 node_type = *(pat_it + 1);
1472 switch (node_type) {
1474 s.emplace(data_it, pat_it + 2);
1475 if (data_it != data_last && eq(*data_it, *pat_it))
1476 s.emplace(data_it + 1, pat_it + 2);
1479 s.emplace(data_it, pat_it + 2);
1480 if (data_it != data_last && eq(*data_it, *pat_it))
1481 s.emplace(data_it + 1, pat_it);
1485 if (data_it != data_last && eq(*data_it, *pat_it))
1486 s.emplace(data_it + 1, pat_it + 1);
1493template <
class DataRange,
class PatternRange,
class FuncEq = std::equal_to<>>
1494auto match_simple_regex(
const DataRange& data,
const PatternRange& pattern,
1495 FuncEq eq = FuncEq())
1497 using namespace std;
1498 return match_simple_regex(begin(data), end(data), begin(pattern),
1502auto inline match_compund_rule(
const std::vector<const Flag_Set*>& words_data,
1503 const std::u16string& pattern)
1505 return match_simple_regex(
1506 words_data, pattern,
1507 [](
const Flag_Set* d,
char16_t p) {
return d->contains(p); });
1510auto inline Compound_Rule_Table::match_any_rule(
1511 const std::vector<const Flag_Set*>& data)
const ->
bool
1513 return any_of(begin(rules), end(rules), [&](
const std::u16string& p) {
1514 return match_compund_rule(data, p);
1518using List_Strings = std::vector<std::string>;
1520class Replacement_Table {
1522 using Str = std::string;
1523 using Table_Str = std::vector<std::pair<Str, Str>>;
1524 using iterator =
typename Table_Str::iterator;
1525 using const_iterator =
typename Table_Str::const_iterator;
1529 size_t whole_word_reps_last_idx = 0;
1530 size_t start_word_reps_last_idx = 0;
1531 size_t end_word_reps_last_idx = 0;
1533 auto order_entries() -> void;
1536 Replacement_Table() =
default;
1537 explicit Replacement_Table(
const Table_Str& v) : table(v)
1541 explicit Replacement_Table(Table_Str&& v) : table(std::move(v))
1546 auto& operator=(
const Table_Str& v)
1553 auto& operator=(Table_Str&& v)
1555 table = std::move(v);
1560 auto whole_word_replacements() const -> Subrange<const_iterator>
1562 return {begin(table), begin(table) + whole_word_reps_last_idx};
1564 auto start_word_replacements() const -> Subrange<const_iterator>
1566 return {begin(table) + whole_word_reps_last_idx,
1567 begin(table) + start_word_reps_last_idx};
1569 auto end_word_replacements() const -> Subrange<const_iterator>
1571 return {begin(table) + start_word_reps_last_idx,
1572 begin(table) + end_word_reps_last_idx};
1574 auto any_place_replacements() const -> Subrange<const_iterator>
1576 return {begin(table) + end_word_reps_last_idx, end(table)};
1579auto inline Replacement_Table::order_entries() ->
void
1581 auto it = remove_if(begin(table), end(table), [](
auto& p) {
1584 (s.size() == 1 && (s[0] ==
'^' || s[0] ==
'$'));
1586 table.erase(it, end(table));
1588 auto is_start_word_pat = [=](
auto& x) {
return x.first[0] ==
'^'; };
1589 auto is_end_word_pat = [=](
auto& x) {
return x.first.back() ==
'$'; };
1591 auto start_word_reps_last =
1592 partition(begin(table), end(table), is_start_word_pat);
1593 start_word_reps_last_idx = start_word_reps_last - begin(table);
1594 for_each(begin(table), start_word_reps_last,
1595 [](
auto& e) { e.first.erase(0, 1); });
1597 auto whole_word_reps_last =
1598 partition(begin(table), start_word_reps_last, is_end_word_pat);
1599 whole_word_reps_last_idx = whole_word_reps_last - begin(table);
1600 for_each(begin(table), whole_word_reps_last,
1601 [](
auto& e) { e.first.pop_back(); });
1603 auto end_word_reps_last =
1604 partition(start_word_reps_last, end(table), is_end_word_pat);
1605 end_word_reps_last_idx = end_word_reps_last - begin(table);
1606 for_each(start_word_reps_last, end_word_reps_last,
1607 [](
auto& e) { e.first.pop_back(); });
1610struct Similarity_Group {
1611 using Str = std::string;
1613 std::vector<Str> strings;
1615 auto parse(
const Str& s) -> void;
1616 Similarity_Group() =
default;
1617 explicit Similarity_Group(
const Str& s) { parse(s); }
1618 auto& operator=(
const Str& s)
1624auto inline Similarity_Group::parse(
const Str& s) ->
void
1628 auto j = s.find(
'(', i);
1629 chars.append(s, i, j - i);
1640 strings.push_back(s.substr(i, len));
1645class Phonetic_Table {
1646 using Char_Type = char;
1647 using Str = std::string;
1648 using Pair_Str = std::pair<Str, Str>;
1650 struct Phonet_Match_Result {
1651 size_t count_matched = 0;
1652 size_t go_back_before_replace = 0;
1653 size_t priority = 5;
1654 bool go_back_after_replace =
false;
1655 bool treat_next_as_begin =
false;
1656 operator bool() {
return count_matched; }
1659 std::vector<std::pair<Str, Str>> table;
1660 auto order() -> void;
1661 auto static match(
const Str& data,
size_t i,
const Str& pattern,
1662 bool at_begin) -> Phonet_Match_Result;
1665 Phonetic_Table() =
default;
1666 explicit Phonetic_Table(
const std::vector<Pair_Str>& v) : table(v)
1670 explicit Phonetic_Table(std::vector<Pair_Str>&& v) : table(std::move(v))
1674 auto& operator=(
const std::vector<Pair_Str>& v)
1680 auto& operator=(std::vector<Pair_Str>&& v)
1682 table = std::move(v);
1686 auto replace(Str& word)
const -> bool;
1689auto inline Phonetic_Table::order() ->
void
1691 stable_sort(begin(table), end(table), [](
auto& pair1,
auto& pair2) {
1692 if (pair2.first.empty())
1694 if (pair1.first.empty())
1696 return pair1.first[0] < pair2.first[0];
1698 auto it = find_if_not(begin(table), end(table),
1699 [](
auto& p) {
return p.first.empty(); });
1700 table.erase(begin(table), it);
1701 for (
auto& r : table) {
1702 if (r.second ==
"_")
1707auto inline Phonetic_Table::match(
const Str& data,
size_t i,
const Str& pattern,
1708 bool at_begin) -> Phonet_Match_Result
1710 auto ret = Phonet_Match_Result();
1711 auto j = pattern.find_first_of(
"(<-0123456789^$");
1712 if (j == pattern.npos)
1714 if (data.compare(i, j, pattern, 0, j) == 0)
1715 ret.count_matched = j;
1718 if (j == pattern.size())
1720 if (pattern[j] ==
'(') {
1721 auto k = pattern.find(
')', j);
1722 if (k == pattern.npos)
1724 auto x = std::char_traits<Char_Type>::find(
1725 &pattern[j + 1], k - (j + 1), data[i + j]);
1729 ret.count_matched += 1;
1731 if (j == pattern.size())
1733 if (pattern[j] ==
'<') {
1734 ret.go_back_after_replace =
true;
1737 auto k = pattern.find_first_not_of(
'-', j);
1738 if (k == pattern.npos) {
1740 ret.go_back_before_replace = k - j;
1741 if (ret.go_back_before_replace >= ret.count_matched)
1746 ret.go_back_before_replace = k - j;
1747 if (ret.go_back_before_replace >= ret.count_matched)
1751 if (pattern[j] >=
'0' && pattern[j] <=
'9') {
1752 ret.priority = pattern[j] -
'0';
1755 if (j == pattern.size())
1757 if (pattern[j] ==
'^') {
1762 if (j == pattern.size())
1764 if (pattern[j] ==
'^') {
1765 ret.treat_next_as_begin =
true;
1768 if (j == pattern.size())
1770 if (pattern[j] !=
'$')
1772 if (i + ret.count_matched == data.size())
1777auto inline Phonetic_Table::replace(Str& word)
const ->
bool
1780 auto operator()(Char_Type c,
const Pair_Str& s)
1782 return c < s.first[0];
1784 auto operator()(
const Pair_Str& s, Char_Type c)
1786 return s.first[0] < c;
1792 auto treat_next_as_begin =
true;
1793 size_t count_go_backs_after_replace = 0;
1794 for (
size_t i = 0; i != word.size(); ++i) {
1796 equal_range(begin(table), end(table), word[i], Cmp());
1797 for (
auto& r : Subrange(rules)) {
1799 auto m1 = match(word, i, r.first, treat_next_as_begin);
1802 if (!m1.go_back_before_replace) {
1803 auto j = i + m1.count_matched - 1;
1804 auto rules2 = equal_range(
1805 begin(table), end(table), word[j], Cmp());
1806 for (
auto& r2 : Subrange(rules2)) {
1808 match(word, j, r2.first,
false);
1809 if (m2 && m2.priority >= m1.priority) {
1818 i, m1.count_matched - m1.go_back_before_replace,
1820 treat_next_as_begin = m1.treat_next_as_begin;
1821 if (m1.go_back_after_replace &&
1822 count_go_backs_after_replace < 100) {
1823 count_go_backs_after_replace++;
1826 i += rule->second.size();
1835NUSPELL_END_INLINE_NAMESPACE
Library main namespace.
Definition aff_data.cxx:33