Nuspell
spellchecker
dictionary.hxx
Go to the documentation of this file.
1 /* Copyright 2016-2018 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
24 #ifndef NUSPELL_DICTIONARY_HXX
25 #define NUSPELL_DICTIONARY_HXX
26 
27 #include "aff_data.hxx"
28 
29 namespace nuspell {
30 
31 enum Affixing_Mode {
32  FULL_WORD,
33  AT_COMPOUND_BEGIN,
34  AT_COMPOUND_END,
35  AT_COMPOUND_MIDDLE
36 };
37 
38 template <class... Affixes>
40  : public std::tuple<Word_List::const_pointer, const Affixes*...> {
41  using base_type =
42  std::tuple<Word_List::const_pointer, const Affixes*...>;
43  Affixing_Result() = default;
44  Affixing_Result(Word_List::const_reference word_entry,
45  const Affixes&... affixes)
46  : base_type(&word_entry, &affixes...)
47  {
48  }
49  // operator bool() const { return std::get<0>(*this); }
50  operator Word_List::const_pointer() const { return std::get<0>(*this); }
51  auto& operator*() const { return *std::get<0>(*this); }
52  auto operator-> () const { return std::get<0>(*this); }
53 };
54 
56  Word_List::const_pointer word_entry = {};
57  bool affixed_and_modified = {};
58  operator Word_List::const_pointer() const { return word_entry; }
59  auto& operator*() const { return *word_entry; }
60  auto operator-> () const { return word_entry; }
61 };
62 
63 struct Dict_Base : public Aff_Data {
64 
65  auto spell_priv(std::wstring& s) const -> bool;
66  auto spell_break(std::wstring& s, size_t depth = 0) const -> bool;
67  auto spell_casing(std::wstring& s) const -> const Flag_Set*;
68  auto spell_casing_upper(std::wstring& s) const -> const Flag_Set*;
69  auto spell_casing_title(std::wstring& s) const -> const Flag_Set*;
70  auto spell_sharps(std::wstring& base, size_t n_pos = 0, size_t n = 0,
71  size_t rep = 0) const -> const Flag_Set*;
72 
73  auto check_word(std::wstring& s) const -> const Flag_Set*;
74 
75  template <Affixing_Mode m>
76  auto affix_NOT_valid(const Prefix<wchar_t>& a) const;
77  template <Affixing_Mode m>
78  auto affix_NOT_valid(const Suffix<wchar_t>& a) const;
79  template <Affixing_Mode m, class AffixT>
80  auto outer_affix_NOT_valid(const AffixT& a) const;
81  template <class AffixT>
82  auto is_circumfix(const AffixT& a) const;
83  template <Affixing_Mode m>
84  auto is_valid_inside_compound(const Flag_Set& flags) const;
85 
91  template <Affixing_Mode m = FULL_WORD>
92  auto strip_prefix_only(std::wstring& s) const
94 
100  template <Affixing_Mode m = FULL_WORD>
101  auto strip_suffix_only(std::wstring& s) const
103 
113  template <Affixing_Mode m = FULL_WORD>
114  auto strip_prefix_then_suffix(std::wstring& s) const
116 
117  template <Affixing_Mode m>
118  auto strip_pfx_then_sfx_2(const Prefix<wchar_t>& pe,
119  std::wstring& s) const
120  -> Affixing_Result<Suffix<wchar_t>, Prefix<wchar_t>>;
121 
131  template <Affixing_Mode m = FULL_WORD>
132  auto strip_suffix_then_prefix(std::wstring& s) const
134 
135  template <Affixing_Mode m>
136  auto strip_sfx_then_pfx_2(const Suffix<wchar_t>& se,
137  std::wstring& s) const
138  -> Affixing_Result<Prefix<wchar_t>, Suffix<wchar_t>>;
139 
140  template <Affixing_Mode m = FULL_WORD>
141  auto strip_prefix_then_suffix_commutative(std::wstring& word) const
142  -> Affixing_Result<Suffix<wchar_t>, Prefix<wchar_t>>;
143 
144  template <Affixing_Mode m = FULL_WORD>
145  auto strip_pfx_then_sfx_comm_2(const Prefix<wchar_t>& pe,
146  std::wstring& word) const
147  -> Affixing_Result<Suffix<wchar_t>, Prefix<wchar_t>>;
148 
149  template <Affixing_Mode m = FULL_WORD>
150  auto strip_suffix_then_suffix(std::wstring& s) const
151  -> Affixing_Result<Suffix<wchar_t>, Suffix<wchar_t>>;
152 
153  template <Affixing_Mode m>
154  auto strip_sfx_then_sfx_2(const Suffix<wchar_t>& se1,
155  std::wstring& s) const
156  -> Affixing_Result<Suffix<wchar_t>, Suffix<wchar_t>>;
157 
158  template <Affixing_Mode m = FULL_WORD>
159  auto strip_prefix_then_prefix(std::wstring& s) const
160  -> Affixing_Result<Prefix<wchar_t>, Prefix<wchar_t>>;
161 
162  template <Affixing_Mode m>
163  auto strip_pfx_then_pfx_2(const Prefix<wchar_t>& pe1,
164  std::wstring& s) const
165  -> Affixing_Result<Prefix<wchar_t>, Prefix<wchar_t>>;
166 
167  template <Affixing_Mode m = FULL_WORD>
168  auto strip_prefix_then_2_suffixes(std::wstring& s) const
170 
171  template <Affixing_Mode m>
172  auto strip_pfx_2_sfx_3(const Prefix<wchar_t>& pe1,
173  const Suffix<wchar_t>& se1,
174  std::wstring& s) const -> Affixing_Result<>;
175 
176  template <Affixing_Mode m = FULL_WORD>
177  auto strip_suffix_prefix_suffix(std::wstring& s) const
179 
180  template <Affixing_Mode m>
181  auto strip_s_p_s_3(const Suffix<wchar_t>& se1,
182  const Prefix<wchar_t>& pe1, std::wstring& word) const
184 
185  template <Affixing_Mode m = FULL_WORD>
186  auto strip_2_suffixes_then_prefix(std::wstring& s) const
188 
189  template <Affixing_Mode m>
190  auto strip_2_sfx_pfx_3(const Suffix<wchar_t>& se1,
191  const Suffix<wchar_t>& se2,
192  std::wstring& word) const -> Affixing_Result<>;
193 
194  template <Affixing_Mode m = FULL_WORD>
195  auto strip_suffix_then_2_prefixes(std::wstring& s) const
197 
198  template <Affixing_Mode m>
199  auto strip_sfx_2_pfx_3(const Suffix<wchar_t>& se1,
200  const Prefix<wchar_t>& pe1,
201  std::wstring& s) const -> Affixing_Result<>;
202 
203  template <Affixing_Mode m = FULL_WORD>
204  auto strip_prefix_suffix_prefix(std::wstring& word) const
206 
207  template <Affixing_Mode m>
208  auto strip_p_s_p_3(const Prefix<wchar_t>& pe1,
209  const Suffix<wchar_t>& se1, std::wstring& word) const
211 
212  template <Affixing_Mode m = FULL_WORD>
213  auto strip_2_prefixes_then_suffix(std::wstring& word) const
215  template <Affixing_Mode m>
216  auto strip_2_pfx_sfx_3(const Prefix<wchar_t>& pe1,
217  const Prefix<wchar_t>& pe2,
218  std::wstring& word) const -> Affixing_Result<>;
219 
220  auto check_compound(std::wstring& word) const -> Compounding_Result;
221 
222  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
223  auto check_compound(std::wstring& word, size_t start_pos,
224  size_t num_part, std::wstring& part) const
226 
227  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
228  auto check_compound_classic(std::wstring& word, size_t start_pos,
229  size_t i, size_t num_part,
230  std::wstring& part) const
232 
233  template <Affixing_Mode m = AT_COMPOUND_BEGIN>
234  auto check_compound_with_pattern_replacements(std::wstring& word,
235  size_t start_pos,
236  size_t i, size_t num_part,
237  std::wstring& part) const
239 
240  auto check_compound_with_rules(std::wstring& word,
241  std::vector<const Flag_Set*>& words_data,
242  size_t start_pos,
243  std::wstring& part) const
245 
246  template <Affixing_Mode m>
247  auto check_word_in_compound(std::wstring& s) const
249 
250  auto suggest_priv(std::wstring& word, List_WStrings& out) const -> void;
251 
252  auto add_sug_if_correct(std::wstring& word, List_WStrings& out) const
253  -> bool;
254 
255  auto try_rep_suggestion(std::wstring& word, List_WStrings& out) const
256  -> void;
257 
258  auto rep_suggest(std::wstring& word, List_WStrings& out) const -> void;
259 
260  auto extra_char_suggest(std::wstring& word, List_WStrings& out) const
261  -> void;
262 
263  auto map_suggest(std::wstring& word, List_WStrings& out,
264  size_t i = 0) const -> void;
265 
266  auto keyboard_suggest(std::wstring& word, List_WStrings& out) const
267  -> void;
268 
269  auto bad_char_suggest(std::wstring& word, List_WStrings& out) const
270  -> void;
271 
272  auto forgotten_char_suggest(std::wstring& word,
273  List_WStrings& out) const -> void;
274 
275  auto phonetic_suggest(std::wstring& word, List_WStrings& out) const
276  -> void;
277 
278  public:
279  Dict_Base()
280  : Aff_Data() // we explicity do value init so content is zeroed
281  {
282  }
283 };
284 
288 inline namespace v2 {
289 
290 class Dictionary_Loading_Error : public std::runtime_error {
291  public:
292  using std::runtime_error::runtime_error;
293 };
294 
298 class Dictionary : private Dict_Base {
299  std::locale external_locale;
300  bool external_locale_known_utf8;
301 
302  Dictionary(std::istream& aff, std::istream& dic);
303  auto external_to_internal_encoding(const std::string& in,
304  std::wstring& wide_out) const
305  -> bool;
306 
307  auto internal_to_external_encoding(const std::wstring& wide_in,
308  std::string& out) const -> bool;
309 
310  public:
311  Dictionary();
312  auto static load_from_aff_dic(std::istream& aff, std::istream& dic)
313  -> Dictionary;
314  auto static load_from_path(
315  const std::string& file_path_without_extension) -> Dictionary;
316  auto imbue(const std::locale& loc) -> void;
317  auto spell(const std::string& word) const -> bool;
318  auto suggest(const std::string& word,
319  std::vector<std::string>& out) const -> void;
320 };
321 } // namespace v2
322 } // namespace nuspell
323 #endif // NUSPELL_DICTIONARY_HXX
Vector of strings that recycles erased strings.
Definition: structures.hxx:1128
Definition: dictionary.hxx:39
Definition: structures.hxx:837
Library main namespace.
Definition: aff_data.cxx:78
Affixing data structures, private header.
Definition: dictionary.hxx:63
Definition: dictionary.hxx:55
The only important public class.
Definition: dictionary.hxx:298
Definition: aff_data.hxx:62
Definition: structures.hxx:886
Definition: dictionary.hxx:290