Nuspell
spellchecker
dictionary.hxx
Go to the documentation of this file.
1 /* Copyright 2016-2020 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
24 #ifndef NUSPELL_DICTIONARY_HXX
25 #define NUSPELL_DICTIONARY_HXX
26 
27 #include "aff_data.hxx"
28 
29 #include <locale>
30 
31 NUSPELL_MSVC_PRAGMA_WARNING(push)
32 NUSPELL_MSVC_PRAGMA_WARNING(disable : 4251 4275)
33 
34 namespace nuspell {
35 inline namespace v4 {
36 
37 enum Affixing_Mode {
38  FULL_WORD,
39  AT_COMPOUND_BEGIN,
40  AT_COMPOUND_END,
41  AT_COMPOUND_MIDDLE
42 };
43 
45  Word_List::const_pointer root_word = {};
46 
47  operator Word_List::const_pointer() const { return root_word; }
48  auto& operator*() const { return *root_word; }
49  auto operator->() const { return root_word; }
50 };
51 
52 template <class T1 = void, class T2 = void>
54  const T1* a = {};
55  const T2* b = {};
56 
57  Affixing_Result() = default;
58  Affixing_Result(Word_List::const_reference r, const T1& a, const T2& b)
59  : Affixing_Result_Base{&r}, a{&a}, b{&b}
60  {
61  }
62 };
63 template <class T1>
65  const T1* a = {};
66 
67  Affixing_Result() = default;
68  Affixing_Result(Word_List::const_reference r, const T1& a)
69  : Affixing_Result_Base{&r}, a{&a}
70  {
71  }
72 };
73 
74 template <>
75 struct Affixing_Result<void, void> : Affixing_Result_Base {
76  Affixing_Result() = default;
77  Affixing_Result(Word_List::const_reference r) : Affixing_Result_Base{&r}
78  {
79  }
80 };
81 
83  Word_List::const_pointer word_entry = {};
84  unsigned char num_words_modifier = {};
85  signed char num_syllable_modifier = {};
87  operator Word_List::const_pointer() const { return word_entry; }
88  auto& operator*() const { return *word_entry; }
89  auto operator->() const { return word_entry; }
90 };
91 
92 struct NUSPELL_EXPORT Dict_Base : public Aff_Data {
93 
94  enum Forceucase : bool {
95  FORBID_BAD_FORCEUCASE = false,
96  ALLOW_BAD_FORCEUCASE = true
97  };
98 
99  enum Hidden_Homonym : bool {
100  ACCEPT_HIDDEN_HOMONYM = false,
101  SKIP_HIDDEN_HOMONYM = true
102  };
103 
104  enum High_Quality_Sugs : bool {
105  ALL_LOW_QUALITY_SUGS = false,
106  HAS_HIGH_QUALITY_SUGS = true
107  };
108 
109  auto spell_priv(std::wstring& s) const -> bool;
110  auto spell_break(std::wstring& s, size_t depth = 0) const -> bool;
111  auto spell_casing(std::wstring& s) const -> const Flag_Set*;
112  auto spell_casing_upper(std::wstring& s) const -> const Flag_Set*;
113  auto spell_casing_title(std::wstring& s) const -> const Flag_Set*;
114  auto spell_sharps(std::wstring& base, size_t n_pos = 0, size_t n = 0,
115  size_t rep = 0) const -> const Flag_Set*;
116 
117  auto check_word(std::wstring& s, Forceucase allow_bad_forceucase = {},
118  Hidden_Homonym skip_hidden_homonym = {}) const
119  -> const Flag_Set*;
120  auto check_simple_word(std::wstring& word,
121  Hidden_Homonym skip_hidden_homonym = {}) const
122  -> const Flag_Set*;
123 
124  template <Affixing_Mode m>
125  auto affix_NOT_valid(const Prefix<wchar_t>& a) const;
126  template <Affixing_Mode m>
127  auto affix_NOT_valid(const Suffix<wchar_t>& a) const;
128  template <Affixing_Mode m, class AffixT>
129  auto outer_affix_NOT_valid(const AffixT& a) const;
130  template <class AffixT>
131  auto is_circumfix(const AffixT& a) const;
132  template <Affixing_Mode m>
133  auto is_valid_inside_compound(const Flag_Set& flags) const;
134 
135  template <Affixing_Mode m = FULL_WORD>
136  auto strip_prefix_only(std::wstring& s,
137  Hidden_Homonym skip_hidden_homonym = {}) const
139 
140  template <Affixing_Mode m = FULL_WORD>
141  auto strip_suffix_only(std::wstring& s,
142  Hidden_Homonym skip_hidden_homonym = {}) const
144 
145  template <Affixing_Mode m = FULL_WORD>
146  auto
147  strip_prefix_then_suffix(std::wstring& s,
148  Hidden_Homonym skip_hidden_homonym = {}) const
150 
151  template <Affixing_Mode m>
152  auto strip_pfx_then_sfx_2(const Prefix<wchar_t>& pe, std::wstring& s,
153  Hidden_Homonym skip_hidden_homonym) const
155 
156  template <Affixing_Mode m = FULL_WORD>
157  auto
158  strip_suffix_then_prefix(std::wstring& s,
159  Hidden_Homonym skip_hidden_homonym = {}) const
161 
162  template <Affixing_Mode m>
163  auto strip_sfx_then_pfx_2(const Suffix<wchar_t>& se, std::wstring& s,
164  Hidden_Homonym skip_hidden_homonym) const
166 
167  template <Affixing_Mode m = FULL_WORD>
168  auto strip_prefix_then_suffix_commutative(
169  std::wstring& word, Hidden_Homonym skip_hidden_homonym = {}) const
171 
172  template <Affixing_Mode m = FULL_WORD>
173  auto strip_pfx_then_sfx_comm_2(const Prefix<wchar_t>& pe,
174  std::wstring& word,
175  Hidden_Homonym skip_hidden_homonym) const
177 
178  template <Affixing_Mode m = FULL_WORD>
179  auto
180  strip_suffix_then_suffix(std::wstring& s,
181  Hidden_Homonym skip_hidden_homonym = {}) const
183 
184  template <Affixing_Mode m>
185  auto strip_sfx_then_sfx_2(const Suffix<wchar_t>& se1, std::wstring& s,
186  Hidden_Homonym skip_hidden_homonym) const
188 
189  template <Affixing_Mode m = FULL_WORD>
190  auto
191  strip_prefix_then_prefix(std::wstring& s,
192  Hidden_Homonym skip_hidden_homonym = {}) const
194 
195  template <Affixing_Mode m>
196  auto strip_pfx_then_pfx_2(const Prefix<wchar_t>& pe1, std::wstring& s,
197  Hidden_Homonym skip_hidden_homonym) const
199 
200  template <Affixing_Mode m = FULL_WORD>
201  auto strip_prefix_then_2_suffixes(
202  std::wstring& s, Hidden_Homonym skip_hidden_homonym = {}) const
204 
205  template <Affixing_Mode m>
206  auto strip_pfx_2_sfx_3(const Prefix<wchar_t>& pe1,
207  const Suffix<wchar_t>& se1, std::wstring& s,
208  Hidden_Homonym skip_hidden_homonym) const
210 
211  template <Affixing_Mode m = FULL_WORD>
212  auto strip_suffix_prefix_suffix(
213  std::wstring& s, Hidden_Homonym skip_hidden_homonym = {}) const
215 
216  template <Affixing_Mode m>
217  auto strip_s_p_s_3(const Suffix<wchar_t>& se1,
218  const Prefix<wchar_t>& pe1, std::wstring& word,
219  Hidden_Homonym skip_hidden_homonym) const
221 
222  template <Affixing_Mode m = FULL_WORD>
223  auto strip_2_suffixes_then_prefix(
224  std::wstring& s, Hidden_Homonym skip_hidden_homonym = {}) const
226 
227  template <Affixing_Mode m>
228  auto strip_2_sfx_pfx_3(const Suffix<wchar_t>& se1,
229  const Suffix<wchar_t>& se2, std::wstring& word,
230  Hidden_Homonym skip_hidden_homonym) const
232 
233  template <Affixing_Mode m = FULL_WORD>
234  auto strip_suffix_then_2_prefixes(
235  std::wstring& s, Hidden_Homonym skip_hidden_homonym = {}) const
237 
238  template <Affixing_Mode m>
239  auto strip_sfx_2_pfx_3(const Suffix<wchar_t>& se1,
240  const Prefix<wchar_t>& pe1, std::wstring& s,
241  Hidden_Homonym skip_hidden_homonym) const
243 
244  template <Affixing_Mode m = FULL_WORD>
245  auto strip_prefix_suffix_prefix(
246  std::wstring& word, Hidden_Homonym skip_hidden_homonym = {}) const
248 
249  template <Affixing_Mode m>
250  auto strip_p_s_p_3(const Prefix<wchar_t>& pe1,
251  const Suffix<wchar_t>& se1, std::wstring& word,
252  Hidden_Homonym skip_hidden_homonym) const
254 
255  template <Affixing_Mode m = FULL_WORD>
256  auto strip_2_prefixes_then_suffix(
257  std::wstring& word, Hidden_Homonym skip_hidden_homonym = {}) const
259 
260  template <Affixing_Mode m>
261  auto strip_2_pfx_sfx_3(const Prefix<wchar_t>& pe1,
262  const Prefix<wchar_t>& pe2, std::wstring& word,
263  Hidden_Homonym skip_hidden_homonym) const
265 
266  auto check_compound(std::wstring& word,
267  Forceucase allow_bad_forceucase) const
269 
270  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
271  auto check_compound(std::wstring& word, size_t start_pos,
272  size_t num_part, std::wstring& part,
273  Forceucase allow_bad_forceucase) const
275 
276  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
277  auto check_compound_classic(std::wstring& word, size_t start_pos,
278  size_t i, size_t num_part,
279  std::wstring& part,
280  Forceucase allow_bad_forceucase) const
282 
283  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
284  auto check_compound_with_pattern_replacements(
285  std::wstring& word, size_t start_pos, size_t i, size_t num_part,
286  std::wstring& part, Forceucase allow_bad_forceucase) const
288 
289  template <Affixing_Mode m>
290  auto check_word_in_compound(std::wstring& s) const
292 
293  auto calc_num_words_modifier(const Prefix<wchar_t>& pfx) const
294  -> unsigned char;
295 
296  template <Affixing_Mode m>
297  auto calc_syllable_modifier(Word_List::const_reference we) const
298  -> signed char;
299 
300  template <Affixing_Mode m>
301  auto calc_syllable_modifier(Word_List::const_reference we,
302  const Suffix<wchar_t>& sfx) const
303  -> signed char;
304 
305  auto count_syllables(const std::wstring& word) const -> size_t;
306 
307  auto check_compound_with_rules(std::wstring& word,
308  std::vector<const Flag_Set*>& words_data,
309  size_t start_pos, std::wstring& part,
310  Forceucase allow_bad_forceucase) const
311 
313 
314  auto suggest_priv(std::wstring& word, List_WStrings& out) const -> void;
315 
316  auto suggest_low(std::wstring& word, List_WStrings& out) const
317  -> High_Quality_Sugs;
318 
319  auto add_sug_if_correct(std::wstring& word, List_WStrings& out) const
320  -> bool;
321 
322  auto uppercase_suggest(std::wstring& word, List_WStrings& out) const
323  -> void;
324 
325  auto rep_suggest(std::wstring& word, List_WStrings& out) const -> void;
326 
327  auto try_rep_suggestion(std::wstring& word, List_WStrings& out) const
328  -> void;
329 
330  auto is_rep_similar(std::wstring& word) const -> bool;
331 
332  auto map_suggest(std::wstring& word, List_WStrings& out,
333  size_t i = 0) const -> void;
334 
335  auto adjacent_swap_suggest(std::wstring& word, List_WStrings& out) const
336  -> void;
337 
338  auto distant_swap_suggest(std::wstring& word, List_WStrings& out) const
339  -> void;
340 
341  auto keyboard_suggest(std::wstring& word, List_WStrings& out) const
342  -> void;
343 
344  auto extra_char_suggest(std::wstring& word, List_WStrings& out) const
345  -> void;
346 
347  auto forgotten_char_suggest(std::wstring& word,
348  List_WStrings& out) const -> void;
349 
350  auto move_char_suggest(std::wstring& word, List_WStrings& out) const
351  -> void;
352 
353  auto bad_char_suggest(std::wstring& word, List_WStrings& out) const
354  -> void;
355 
356  auto doubled_two_chars_suggest(std::wstring& word,
357  List_WStrings& out) const -> void;
358 
359  auto two_words_suggest(std::wstring& word, List_WStrings& out) const
360  -> void;
361 
362  auto phonetic_suggest(std::wstring& word, List_WStrings& out) const
363  -> void;
364 
365  auto ngram_suggest(std::wstring& word, List_WStrings& out) const
366  -> void;
367 
368  auto expand_root_word_for_ngram(Word_List::const_reference root,
369  std::wstring_view wrong,
370  List_WStrings& expanded_list,
371  std::vector<bool>& cross_affix) const
372  -> void;
373 
374  public:
375  Dict_Base()
376  : Aff_Data() // we explicity do value init so content is zeroed
377  {
378  }
379 };
380 
384 class NUSPELL_EXPORT Dictionary_Loading_Error : public std::runtime_error {
385  public:
386  using std::runtime_error::runtime_error;
387 };
388 
392 class NUSPELL_EXPORT Dictionary : private Dict_Base {
393  std::locale external_locale;
394  bool external_locale_known_utf8;
395 
396  Dictionary(std::istream& aff, std::istream& dic);
397  auto external_to_internal_encoding(std::string_view in,
398  std::wstring& wide_out) const
399  -> bool;
400 
401  auto internal_to_external_encoding(std::wstring_view wide_in,
402  std::string& out) const -> bool;
403 
404  public:
405  Dictionary();
406  auto static load_from_aff_dic(std::istream& aff, std::istream& dic)
407  -> Dictionary;
408  auto static load_from_path(
409  const std::string& file_path_without_extension) -> Dictionary;
410  [[deprecated]] auto imbue(const std::locale& loc) -> void;
411  [[deprecated]] auto imbue_utf8() -> void;
412  auto spell(std::string_view word) const -> bool;
413  auto suggest(std::string_view word, std::vector<std::string>& out) const
414  -> void;
415 };
416 } // namespace v4
417 } // namespace nuspell
418 NUSPELL_MSVC_PRAGMA_WARNING(pop)
419 #endif // NUSPELL_DICTIONARY_HXX
nuspell::v4::String_Set< char16_t >
nuspell::v4::Affixing_Result_Base
Definition: dictionary.hxx:44
nuspell::v4::Dict_Base
Definition: dictionary.hxx:92
nuspell
Library main namespace.
Definition: aff_data.cxx:31
nuspell::v4::Compounding_Result
Definition: dictionary.hxx:82
nuspell::v4::Prefix
Definition: structures.hxx:850
nuspell::v4::Suffix
Definition: structures.hxx:889
nuspell::v4::Dictionary
The only important public class.
Definition: dictionary.hxx:392
nuspell::v4::Compounding_Result::affixed_and_modified
bool affixed_and_modified
Definition: dictionary.hxx:86
nuspell::v4::Affixing_Result
Definition: dictionary.hxx:53
nuspell::v4::Dictionary_Loading_Error
The only important public exception.
Definition: dictionary.hxx:384
nuspell::v4::Aff_Data
Definition: aff_data.hxx:104
nuspell::v4::List_Basic_Strings
Definition: structures.hxx:1655