Nuspell
spellchecker
aff_data.hxx
1 /* Copyright 2016-2020 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef NUSPELL_AFF_DATA_HXX
20 #define NUSPELL_AFF_DATA_HXX
21 
22 #include "nuspell_export.h"
23 #include "structures.hxx"
24 
25 #include <iosfwd>
26 #include <unicode/locid.h>
27 
28 #ifdef _MSC_VER
29 #define NUSPELL_MSVC_PRAGMA_WARNING(x) __pragma(warning(x))
30 #else
31 #define NUSPELL_MSVC_PRAGMA_WARNING(x)
32 #endif
33 NUSPELL_MSVC_PRAGMA_WARNING(push)
34 NUSPELL_MSVC_PRAGMA_WARNING(disable : 4251)
35 
36 namespace nuspell {
37 inline namespace v4 {
38 
39 class Encoding {
40  std::string name;
41 
42  NUSPELL_EXPORT auto normalize_name() -> void;
43 
44  public:
45  enum Enc_Type { SINGLEBYTE = false, UTF8 = true };
46 
47  Encoding() = default;
48  explicit Encoding(const std::string& e) : name(e) { normalize_name(); }
49  explicit Encoding(std::string&& e) : name(move(e)) { normalize_name(); }
50  explicit Encoding(const char* e) : name(e) { normalize_name(); }
51  auto& operator=(const std::string& e)
52  {
53  name = e;
54  normalize_name();
55  return *this;
56  }
57  auto& operator=(std::string&& e)
58  {
59  name = move(e);
60  normalize_name();
61  return *this;
62  }
63  auto& operator=(const char* e)
64  {
65  name = e;
66  normalize_name();
67  return *this;
68  }
69  auto empty() const { return name.empty(); }
70  auto& value() const { return name; }
71  auto is_utf8() const { return name == "UTF-8"; }
72  auto value_or_default() const -> std::string
73  {
74  if (name.empty())
75  return "ISO8859-1";
76  else
77  return name;
78  }
79  operator Enc_Type() const { return is_utf8() ? UTF8 : SINGLEBYTE; }
80 };
81 
82 enum class Flag_Type { SINGLE_CHAR, DOUBLE_CHAR, NUMBER, UTF8 };
83 
85  auto& operator()(const std::pair<std::wstring, Flag_Set>& p) const
86  {
87  return p.first;
88  }
89 };
90 
103 
104 struct NUSPELL_EXPORT Aff_Data {
105  static constexpr auto HIDDEN_HOMONYM_FLAG = char16_t(-1);
106  static constexpr auto MAX_SUGGESTIONS = size_t(16);
107 
108  // spell checking options
109  Word_List words;
110  Prefix_Table prefixes;
111  Suffix_Table suffixes;
112 
113  bool complex_prefixes;
114  bool fullstrip;
115  bool checksharps;
116  bool forbid_warn;
117  char16_t compound_onlyin_flag;
118  char16_t circumfix_flag;
119  char16_t forbiddenword_flag;
120  char16_t keepcase_flag;
121  char16_t need_affix_flag;
122  char16_t warn_flag;
123 
124  // compounding options
125  char16_t compound_flag;
126  char16_t compound_begin_flag;
127  char16_t compound_last_flag;
128  char16_t compound_middle_flag;
129  Compound_Rule_Table compound_rules;
130 
131  // spell checking options
132  Break_Table<wchar_t> break_table;
133  Substr_Replacer<wchar_t> input_substr_replacer;
134  std::wstring ignored_chars;
135  icu::Locale icu_locale;
136  Substr_Replacer<wchar_t> output_substr_replacer;
137 
138  // suggestion options
139  Replacement_Table<wchar_t> replacements;
140  std::vector<Similarity_Group<wchar_t>> similarities;
141  std::wstring keyboard_closeness;
142  std::wstring try_chars;
143  Phonetic_Table<wchar_t> phonetic_table;
144 
145  char16_t nosuggest_flag;
146  char16_t substandard_flag;
147  unsigned short max_compound_suggestions;
148  unsigned short max_ngram_suggestions;
149  unsigned short max_diff_factor;
150  bool only_max_diff;
151  bool no_split_suggestions;
152  bool suggest_with_dots;
153 
154  // compounding options
155  unsigned short compound_min_length;
156  unsigned short compound_max_word_count;
157  char16_t compound_permit_flag;
158  char16_t compound_forbid_flag;
159  char16_t compound_root_flag;
160  char16_t compound_force_uppercase;
161  bool compound_more_suffixes;
162  bool compound_check_duplicate;
163  bool compound_check_rep;
164  bool compound_check_case;
165  bool compound_check_triple;
166  bool compound_simplified_triple;
167  bool compound_syllable_num;
168  unsigned short compound_syllable_max;
169  std::wstring compound_syllable_vowels;
170  std::vector<Compound_Pattern<wchar_t>> compound_patterns;
171 
172  // data members used only while parsing
173  Flag_Type flag_type;
174  Encoding encoding;
175  std::vector<Flag_Set> flag_aliases;
176  std::string wordchars; // deprecated?
177 
178  auto parse_aff(std::istream& in) -> bool;
179  auto parse_dic(std::istream& in) -> bool;
180  auto parse_aff_dic(std::istream& aff, std::istream& dic)
181  {
182  if (parse_aff(aff))
183  return parse_dic(dic);
184  return false;
185  }
186 };
187 } // namespace v4
188 } // namespace nuspell
189 NUSPELL_MSVC_PRAGMA_WARNING(pop)
190 #endif // NUSPELL_AFF_DATA_HXX
nuspell::v4::Prefix_Table
Definition: structures.hxx:1284
nuspell::v4::Replacement_Table< wchar_t >
nuspell::v4::Substr_Replacer< wchar_t >
nuspell
Library main namespace.
Definition: aff_data.cxx:31
nuspell::v4::Break_Table< wchar_t >
nuspell::v4::Encoding
Definition: aff_data.hxx:39
nuspell::v4::Extractor_First_of_Word_Pair
Definition: aff_data.hxx:84
nuspell::v4::Phonetic_Table< wchar_t >
nuspell::v4::Hash_Multiset< std::pair< std::wstring, Flag_Set >, std::wstring, Extractor_First_of_Word_Pair >
nuspell::v4::Compound_Rule_Table
Definition: structures.hxx:1443
nuspell::v4::Suffix_Table
Definition: structures.hxx:1336
nuspell::v4::Aff_Data
Definition: aff_data.hxx:104