Nuspell
spellchecker
dictionary.hxx
Go to the documentation of this file.
1 /* Copyright 2016-2018 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
24 #ifndef NUSPELL_DICTIONARY_HXX
25 #define NUSPELL_DICTIONARY_HXX
26 
27 #include "aff_data.hxx"
28 
29 namespace nuspell {
30 
31 enum Affixing_Mode {
32  FULL_WORD,
33  AT_COMPOUND_BEGIN,
34  AT_COMPOUND_END,
35  AT_COMPOUND_MIDDLE
36 };
37 
38 template <class... Affixes>
40  : public std::tuple<Word_List::const_pointer, const Affixes*...> {
41  using base_type =
42  std::tuple<Word_List::const_pointer, const Affixes*...>;
43  Affixing_Result() = default;
44  Affixing_Result(Word_List::const_reference word_entry,
45  const Affixes&... affixes)
46  : base_type(&word_entry, &affixes...)
47  {
48  }
49  // operator bool() const { return std::get<0>(*this); }
50  operator Word_List::const_pointer() const { return std::get<0>(*this); }
51  auto& operator*() const { return *std::get<0>(*this); }
52  auto operator-> () const { return std::get<0>(*this); }
53 };
54 
56  Word_List::const_pointer word_entry = {};
57  bool affixed_and_modified = {};
58  operator Word_List::const_pointer() const { return word_entry; }
59  auto& operator*() const { return *word_entry; }
60  auto operator-> () const { return word_entry; }
61 };
62 
63 struct Dict_Base : public Aff_Data {
64 
65  template <class CharT>
66  auto spell_priv(std::basic_string<CharT>& s) const -> bool;
67  template <class CharT>
68  auto spell_break(std::basic_string<CharT>& s, size_t depth = 0) const
69  -> bool;
70  template <class CharT>
71  auto spell_casing(std::basic_string<CharT>& s) const -> const Flag_Set*;
72  template <class CharT>
73  auto spell_casing_upper(std::basic_string<CharT>& s) const
74  -> const Flag_Set*;
75  template <class CharT>
76  auto spell_casing_title(std::basic_string<CharT>& s) const
77  -> const Flag_Set*;
78  template <class CharT>
79  auto spell_sharps(std::basic_string<CharT>& base, size_t n_pos = 0,
80  size_t n = 0, size_t rep = 0) const
81  -> const Flag_Set*;
82  template <class CharT>
83  auto check_word(std::basic_string<CharT>& s) const -> const Flag_Set*;
84 
85  template <Affixing_Mode m, class CharT>
86  auto affix_NOT_valid(const Prefix<CharT>& a) const;
87  template <Affixing_Mode m, class CharT>
88  auto affix_NOT_valid(const Suffix<CharT>& a) const;
89  template <Affixing_Mode m, class AffixT>
90  auto outer_affix_NOT_valid(const AffixT& a) const;
91  template <class AffixT>
92  auto is_circumfix(const AffixT& a) const;
93  template <Affixing_Mode m>
94  auto is_valid_inside_compound(const Flag_Set& flags) const;
95 
101  template <Affixing_Mode m = FULL_WORD, class CharT>
102  auto strip_prefix_only(std::basic_string<CharT>& s) const
104 
110  template <Affixing_Mode m = FULL_WORD, class CharT>
111  auto strip_suffix_only(std::basic_string<CharT>& s) const
113 
123  template <Affixing_Mode m = FULL_WORD, class CharT>
124  auto strip_prefix_then_suffix(std::basic_string<CharT>& s) const
126 
127  template <Affixing_Mode m, class CharT>
128  auto strip_pfx_then_sfx_2(const Prefix<CharT>& pe,
129  std::basic_string<CharT>& s) const
130  -> Affixing_Result<Suffix<CharT>, Prefix<CharT>>;
131 
141  template <Affixing_Mode m = FULL_WORD, class CharT>
142  auto strip_suffix_then_prefix(std::basic_string<CharT>& s) const
144 
145  template <Affixing_Mode m, class CharT>
146  auto strip_sfx_then_pfx_2(const Suffix<CharT>& se,
147  std::basic_string<CharT>& s) const
148  -> Affixing_Result<Prefix<CharT>, Suffix<CharT>>;
149 
150  template <Affixing_Mode m = FULL_WORD, class CharT>
151  auto strip_prefix_then_suffix_commutative(
152  std::basic_string<CharT>& word) const
153  -> Affixing_Result<Suffix<CharT>, Prefix<CharT>>;
154 
155  template <Affixing_Mode m = FULL_WORD, class CharT>
156  auto strip_pfx_then_sfx_comm_2(const Prefix<CharT>& pe,
157  std::basic_string<CharT>& word) const
158  -> Affixing_Result<Suffix<CharT>, Prefix<CharT>>;
159 
160  template <Affixing_Mode m = FULL_WORD, class CharT>
161  auto strip_suffix_then_suffix(std::basic_string<CharT>& s) const
162  -> Affixing_Result<Suffix<CharT>, Suffix<CharT>>;
163 
164  template <Affixing_Mode m, class CharT>
165  auto strip_sfx_then_sfx_2(const Suffix<CharT>& se1,
166  std::basic_string<CharT>& s) const
167  -> Affixing_Result<Suffix<CharT>, Suffix<CharT>>;
168 
169  template <Affixing_Mode m = FULL_WORD, class CharT>
170  auto strip_prefix_then_prefix(std::basic_string<CharT>& s) const
171  -> Affixing_Result<Prefix<CharT>, Prefix<CharT>>;
172 
173  template <Affixing_Mode m, class CharT>
174  auto strip_pfx_then_pfx_2(const Prefix<CharT>& pe1,
175  std::basic_string<CharT>& s) const
176  -> Affixing_Result<Prefix<CharT>, Prefix<CharT>>;
177 
178  template <Affixing_Mode m = FULL_WORD, class CharT>
179  auto strip_prefix_then_2_suffixes(std::basic_string<CharT>& s) const
181 
182  template <Affixing_Mode m, class CharT>
183  auto strip_pfx_2_sfx_3(const Prefix<CharT>& pe1,
184  const Suffix<CharT>& se1,
185  std::basic_string<CharT>& s) const
187 
188  template <Affixing_Mode m = FULL_WORD, class CharT>
189  auto strip_suffix_prefix_suffix(std::basic_string<CharT>& s) const
191 
192  template <Affixing_Mode m, class CharT>
193  auto strip_s_p_s_3(const Suffix<CharT>& se1, const Prefix<CharT>& pe1,
194  std::basic_string<CharT>& word) const
196 
197  template <Affixing_Mode m = FULL_WORD, class CharT>
198  auto strip_2_suffixes_then_prefix(std::basic_string<CharT>& s) const
200 
201  template <Affixing_Mode m, class CharT>
202  auto strip_2_sfx_pfx_3(const Suffix<CharT>& se1,
203  const Suffix<CharT>& se2,
204  std::basic_string<CharT>& word) const
206 
207  template <Affixing_Mode m = FULL_WORD, class CharT>
208  auto strip_suffix_then_2_prefixes(std::basic_string<CharT>& s) const
210 
211  template <Affixing_Mode m, class CharT>
212  auto strip_sfx_2_pfx_3(const Suffix<CharT>& se1,
213  const Prefix<CharT>& pe1,
214  std::basic_string<CharT>& s) const
216 
217  template <Affixing_Mode m = FULL_WORD, class CharT>
218  auto strip_prefix_suffix_prefix(std::basic_string<CharT>& word) const
220 
221  template <Affixing_Mode m, class CharT>
222  auto strip_p_s_p_3(const Prefix<CharT>& pe1, const Suffix<CharT>& se1,
223  std::basic_string<CharT>& word) const
225 
226  template <Affixing_Mode m = FULL_WORD, class CharT>
227  auto strip_2_prefixes_then_suffix(std::basic_string<CharT>& word) const
229  template <Affixing_Mode m, class CharT>
230  auto strip_2_pfx_sfx_3(const Prefix<CharT>& pe1,
231  const Prefix<CharT>& pe2,
232  std::basic_string<CharT>& word) const
234 
235  template <class CharT>
236  auto check_compound(std::basic_string<CharT>& word) const
238 
239  template <Affixing_Mode m = AT_COMPOUND_BEGIN, class CharT>
240  auto check_compound(std::basic_string<CharT>& word, size_t start_pos,
241  size_t num_part,
242  std::basic_string<CharT>& part) const
244 
245  template <Affixing_Mode m = AT_COMPOUND_BEGIN, class CharT>
246  auto check_compound_classic(std::basic_string<CharT>& word,
247  size_t start_pos, size_t i, size_t num_part,
248  std::basic_string<CharT>& part) const
250 
251  template <Affixing_Mode m = AT_COMPOUND_BEGIN, class CharT>
252  auto check_compound_with_pattern_replacements(
253  std::basic_string<CharT>& word, size_t start_pos, size_t i,
254  size_t num_part, std::basic_string<CharT>& part) const
256 
257  template <class CharT>
258  auto check_compound_with_rules(std::basic_string<CharT>& word,
259  std::vector<const Flag_Set*>& words_data,
260  size_t start_pos,
261  std::basic_string<CharT>& part) const
263 
264  template <Affixing_Mode m, class CharT>
265  auto check_word_in_compound(std::basic_string<CharT>& s) const
267 
268  template <class CharT>
269  auto suggest_priv(std::basic_string<CharT>& word,
270  List_Basic_Strings<CharT>& out) const -> void;
271 
272  template <class CharT>
273  auto add_sug_if_correct(std::basic_string<CharT>& word,
274  List_Basic_Strings<CharT>& out) const -> bool;
275 
276  template <class CharT>
277  auto try_rep_suggestion(std::basic_string<CharT>& word,
278  List_Basic_Strings<CharT>& out) const -> void;
279 
280  template <class CharT>
281  auto rep_suggest(std::basic_string<CharT>& word,
282  List_Basic_Strings<CharT>& out) const -> void;
283 
284  template <class CharT>
285  auto extra_char_suggest(std::basic_string<CharT>& word,
286  List_Basic_Strings<CharT>& out) const -> void;
287 
288  template <class CharT>
289  auto map_suggest(std::basic_string<CharT>& word,
290  List_Basic_Strings<CharT>& out, size_t i = 0) const
291  -> void;
292 
293  template <class CharT>
294  auto keyboard_suggest(std::basic_string<CharT>& word,
295  List_Basic_Strings<CharT>& out) const -> void;
296 
297  template <class CharT>
298  auto bad_char_suggest(std::basic_string<CharT>& word,
299  List_Basic_Strings<CharT>& out) const -> void;
300 
301  template <class CharT>
302  auto forgotten_char_suggest(std::basic_string<CharT>& word,
303  List_Basic_Strings<CharT>& out) const
304  -> void;
305 
306  template <class CharT>
307  auto phonetic_suggest(std::basic_string<CharT>& word,
308  List_Basic_Strings<CharT>& out) const -> void;
309 
310  public:
311  Dict_Base()
312  : Aff_Data() // we explicity do value init so content is zeroed
313  {
314  }
315 };
316 
320 inline namespace v2 {
321 
325 class Dictionary : private Dict_Base {
326  std::locale external_locale;
327  Encoding_Details enc_details;
328 
329  Dictionary(std::istream& aff, std::istream& dic);
330  auto external_to_internal_encoding(const std::string& in,
331  std::wstring& wide_out,
332  std::string& narrow_out) const
333  -> bool;
334 
335  auto internal_to_external_encoding(std::string& in_out,
336  std::wstring& wide_in_out) const
337  -> bool;
338 
339  public:
340  Dictionary();
341  auto static load_from_aff_dic(std::istream& aff, std::istream& dic)
342  -> Dictionary;
343  auto static load_from_path(
344  const std::string& file_path_without_extension) -> Dictionary;
345  auto imbue(const std::locale& loc) -> void;
346  auto spell(const std::string& word) const -> bool;
347  auto suggest(const std::string& word, List_Strings& out) const -> void;
348 };
349 } // namespace v2
350 } // namespace nuspell
351 #endif // NUSPELL_DICTIONARY_HXX
Vector of strings that recycles erased strings.
Definition: structures.hxx:908
Definition: dictionary.hxx:39
Definition: structures.hxx:636
Library main namespace.
Definition: aff_data.cxx:74
Affixing data structures, private header.
Definition: dictionary.hxx:63
Definition: dictionary.hxx:55
The only important public class.
Definition: dictionary.hxx:325
Definition: aff_data.hxx:96
Definition: structures.hxx:684