Nuspell
spellchecker
checker.hxx
1 /* Copyright 2016-2021 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef NUSPELL_CHECKER_HXX
20 #define NUSPELL_CHECKER_HXX
21 
22 #include "aff_data.hxx"
23 
24 namespace nuspell {
25 inline namespace v5 {
26 
27 enum Affixing_Mode {
28  FULL_WORD,
29  AT_COMPOUND_BEGIN,
30  AT_COMPOUND_END,
31  AT_COMPOUND_MIDDLE
32 };
33 
35  Word_List::const_pointer root_word = {};
36 
37  operator Word_List::const_pointer() const { return root_word; }
38  auto& operator*() const { return *root_word; }
39  auto operator->() const { return root_word; }
40 };
41 
42 template <class T1 = void, class T2 = void>
44  const T1* a = {};
45  const T2* b = {};
46 
47  Affixing_Result() = default;
48  Affixing_Result(Word_List::const_reference r, const T1& a, const T2& b)
49  : Affixing_Result_Base{&r}, a{&a}, b{&b}
50  {
51  }
52 };
53 template <class T1>
55  const T1* a = {};
56 
57  Affixing_Result() = default;
58  Affixing_Result(Word_List::const_reference r, const T1& a)
59  : Affixing_Result_Base{&r}, a{&a}
60  {
61  }
62 };
63 
64 template <>
65 struct Affixing_Result<void, void> : Affixing_Result_Base {
66  Affixing_Result() = default;
67  Affixing_Result(Word_List::const_reference r) : Affixing_Result_Base{&r}
68  {
69  }
70 };
71 
73  Word_List::const_pointer word_entry = {};
74  unsigned char num_words_modifier = {};
75  signed char num_syllable_modifier = {};
77  operator Word_List::const_pointer() const { return word_entry; }
78  auto& operator*() const { return *word_entry; }
79  auto operator->() const { return word_entry; }
80 };
81 
82 struct Checker : public Aff_Data {
83  enum Forceucase : bool {
84  FORBID_BAD_FORCEUCASE = false,
85  ALLOW_BAD_FORCEUCASE = true
86  };
87 
88  enum Hidden_Homonym : bool {
89  ACCEPT_HIDDEN_HOMONYM = false,
90  SKIP_HIDDEN_HOMONYM = true
91  };
92  Checker()
93  : Aff_Data() // we explicity do value init so content is zeroed
94  {
95  }
96  auto spell_priv(std::string& s) const -> bool;
97  auto spell_break(std::string& s, size_t depth = 0) const -> bool;
98  auto spell_casing(std::string& s) const -> const Flag_Set*;
99  auto spell_casing_upper(std::string& s) const -> const Flag_Set*;
100  auto spell_casing_title(std::string& s) const -> const Flag_Set*;
101  auto spell_sharps(std::string& base, size_t n_pos = 0, size_t n = 0,
102  size_t rep = 0) const -> const Flag_Set*;
103 
104  auto check_word(std::string& s, Forceucase allow_bad_forceucase = {},
105  Hidden_Homonym skip_hidden_homonym = {}) const
106  -> const Flag_Set*;
107  auto check_simple_word(std::string& word,
108  Hidden_Homonym skip_hidden_homonym = {}) const
109  -> const Flag_Set*;
110 
111  template <Affixing_Mode m>
112  auto affix_NOT_valid(const Prefix& a) const;
113  template <Affixing_Mode m>
114  auto affix_NOT_valid(const Suffix& a) const;
115  template <Affixing_Mode m, class AffixT>
116  auto outer_affix_NOT_valid(const AffixT& a) const;
117  template <class AffixT>
118  auto is_circumfix(const AffixT& a) const;
119  template <Affixing_Mode m>
120  auto is_valid_inside_compound(const Flag_Set& flags) const;
121 
122  template <Affixing_Mode m = FULL_WORD>
123  auto strip_prefix_only(std::string& s,
124  Hidden_Homonym skip_hidden_homonym = {}) const
126 
127  template <Affixing_Mode m = FULL_WORD>
128  auto strip_suffix_only(std::string& s,
129  Hidden_Homonym skip_hidden_homonym = {}) const
131 
132  template <Affixing_Mode m = FULL_WORD>
133  auto
134  strip_prefix_then_suffix(std::string& s,
135  Hidden_Homonym skip_hidden_homonym = {}) const
137 
138  template <Affixing_Mode m>
139  auto strip_pfx_then_sfx_2(const Prefix& pe, std::string& s,
140  Hidden_Homonym skip_hidden_homonym) const
142 
143  template <Affixing_Mode m = FULL_WORD>
144  auto
145  strip_suffix_then_prefix(std::string& s,
146  Hidden_Homonym skip_hidden_homonym = {}) const
148 
149  template <Affixing_Mode m>
150  auto strip_sfx_then_pfx_2(const Suffix& se, std::string& s,
151  Hidden_Homonym skip_hidden_homonym) const
153 
154  template <Affixing_Mode m = FULL_WORD>
155  auto strip_prefix_then_suffix_commutative(
156  std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
158 
159  template <Affixing_Mode m = FULL_WORD>
160  auto strip_pfx_then_sfx_comm_2(const Prefix& pe, std::string& word,
161  Hidden_Homonym skip_hidden_homonym) const
163 
164  template <Affixing_Mode m = FULL_WORD>
165  auto
166  strip_suffix_then_suffix(std::string& s,
167  Hidden_Homonym skip_hidden_homonym = {}) const
169 
170  template <Affixing_Mode m>
171  auto strip_sfx_then_sfx_2(const Suffix& se1, std::string& s,
172  Hidden_Homonym skip_hidden_homonym) const
174 
175  template <Affixing_Mode m = FULL_WORD>
176  auto
177  strip_prefix_then_prefix(std::string& s,
178  Hidden_Homonym skip_hidden_homonym = {}) const
180 
181  template <Affixing_Mode m>
182  auto strip_pfx_then_pfx_2(const Prefix& pe1, std::string& s,
183  Hidden_Homonym skip_hidden_homonym) const
185 
186  template <Affixing_Mode m = FULL_WORD>
187  auto strip_prefix_then_2_suffixes(
188  std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
190 
191  template <Affixing_Mode m>
192  auto strip_pfx_2_sfx_3(const Prefix& pe1, const Suffix& se1,
193  std::string& s,
194  Hidden_Homonym skip_hidden_homonym) const
196 
197  template <Affixing_Mode m = FULL_WORD>
198  auto strip_suffix_prefix_suffix(
199  std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
201 
202  template <Affixing_Mode m>
203  auto strip_s_p_s_3(const Suffix& se1, const Prefix& pe1,
204  std::string& word,
205  Hidden_Homonym skip_hidden_homonym) const
207 
208  template <Affixing_Mode m = FULL_WORD>
209  auto strip_2_suffixes_then_prefix(
210  std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
212 
213  template <Affixing_Mode m>
214  auto strip_2_sfx_pfx_3(const Suffix& se1, const Suffix& se2,
215  std::string& word,
216  Hidden_Homonym skip_hidden_homonym) const
218 
219  template <Affixing_Mode m = FULL_WORD>
220  auto strip_suffix_then_2_prefixes(
221  std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
223 
224  template <Affixing_Mode m>
225  auto strip_sfx_2_pfx_3(const Suffix& se1, const Prefix& pe1,
226  std::string& s,
227  Hidden_Homonym skip_hidden_homonym) const
229 
230  template <Affixing_Mode m = FULL_WORD>
231  auto strip_prefix_suffix_prefix(
232  std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
234 
235  template <Affixing_Mode m>
236  auto strip_p_s_p_3(const Prefix& pe1, const Suffix& se1,
237  std::string& word,
238  Hidden_Homonym skip_hidden_homonym) const
240 
241  template <Affixing_Mode m = FULL_WORD>
242  auto strip_2_prefixes_then_suffix(
243  std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
245 
246  template <Affixing_Mode m>
247  auto strip_2_pfx_sfx_3(const Prefix& pe1, const Prefix& pe2,
248  std::string& word,
249  Hidden_Homonym skip_hidden_homonym) const
251 
252  auto check_compound(std::string& word,
253  Forceucase allow_bad_forceucase) const
255 
256  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
257  auto check_compound(std::string& word, size_t start_pos,
258  size_t num_part, std::string& part,
259  Forceucase allow_bad_forceucase) const
261 
262  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
263  auto check_compound_classic(std::string& word, size_t start_pos,
264  size_t i, size_t num_part,
265  std::string& part,
266  Forceucase allow_bad_forceucase) const
268 
269  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
270  auto check_compound_with_pattern_replacements(
271  std::string& word, size_t start_pos, size_t i, size_t num_part,
272  std::string& part, Forceucase allow_bad_forceucase) const
274 
275  template <Affixing_Mode m>
276  auto check_word_in_compound(std::string& s) const -> Compounding_Result;
277 
278  auto calc_num_words_modifier(const Prefix& pfx) const -> unsigned char;
279 
280  template <Affixing_Mode m>
281  auto calc_syllable_modifier(Word_List::const_reference we) const
282  -> signed char;
283 
284  template <Affixing_Mode m>
285  auto calc_syllable_modifier(Word_List::const_reference we,
286  const Suffix& sfx) const -> signed char;
287 
288  auto count_syllables(std::string_view word) const -> size_t;
289 
290  auto check_compound_with_rules(std::string& word,
291  std::vector<const Flag_Set*>& words_data,
292  size_t start_pos, std::string& part,
293  Forceucase allow_bad_forceucase) const
294 
296  auto is_rep_similar(std::string& word) const -> bool;
297 };
298 
299 template <Affixing_Mode m>
300 auto Checker::affix_NOT_valid(const Prefix& e) const
301 {
302  if (m == FULL_WORD && e.cont_flags.contains(compound_onlyin_flag))
303  return true;
304  if (m == AT_COMPOUND_END &&
305  !e.cont_flags.contains(compound_permit_flag))
306  return true;
307  if (m != FULL_WORD && e.cont_flags.contains(compound_forbid_flag))
308  return true;
309  return false;
310 }
311 template <Affixing_Mode m>
312 auto Checker::affix_NOT_valid(const Suffix& e) const
313 {
314  if (m == FULL_WORD && e.cont_flags.contains(compound_onlyin_flag))
315  return true;
316  if (m == AT_COMPOUND_BEGIN &&
317  !e.cont_flags.contains(compound_permit_flag))
318  return true;
319  if (m != FULL_WORD && e.cont_flags.contains(compound_forbid_flag))
320  return true;
321  return false;
322 }
323 template <Affixing_Mode m, class AffixT>
324 auto Checker::outer_affix_NOT_valid(const AffixT& e) const
325 {
326  if (affix_NOT_valid<m>(e))
327  return true;
328  if (e.cont_flags.contains(need_affix_flag))
329  return true;
330  return false;
331 }
332 template <class AffixT>
333 auto Checker::is_circumfix(const AffixT& a) const
334 {
335  return a.cont_flags.contains(circumfix_flag);
336 }
337 
338 template <class AffixInner, class AffixOuter>
339 auto cross_valid_inner_outer(const AffixInner& inner, const AffixOuter& outer)
340 {
341  return inner.cont_flags.contains(outer.flag);
342 }
343 
344 template <class Affix>
345 auto cross_valid_inner_outer(const Flag_Set& word_flags, const Affix& afx)
346 {
347  return word_flags.contains(afx.flag);
348 }
349 
350 } // namespace v5
351 } // namespace nuspell
352 #endif // NUSPELL_CHECKER_HXX
nuspell::v5::Checker
Definition: checker.hxx:82
nuspell
Library main namespace.
Definition: aff_data.cxx:31
nuspell::v5::Affixing_Result
Definition: checker.hxx:43
nuspell::v5::Compounding_Result::affixed_and_modified
bool affixed_and_modified
Definition: checker.hxx:76
nuspell::v5::String_Set< char16_t >
nuspell::v5::Compounding_Result
Definition: checker.hxx:72
nuspell::v5::Affixing_Result_Base
Definition: checker.hxx:34
nuspell::v5::Prefix
Definition: structures.hxx:803
nuspell::v5::Aff_Data
Definition: aff_data.hxx:88
nuspell::v5::Suffix
Definition: structures.hxx:839