Nuspell
spell checker
aff_data.hxx
1 /* Copyright 2016-2023 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef NUSPELL_AFF_DATA_HXX
20 #define NUSPELL_AFF_DATA_HXX
21 
22 #include "nuspell_export.h"
23 #include "structures.hxx"
24 
25 #include <iosfwd>
26 #include <unicode/locid.h>
27 
28 namespace nuspell {
29 NUSPELL_BEGIN_INLINE_NAMESPACE
30 
31 class Encoding {
32  std::string name;
33 
34  NUSPELL_EXPORT auto normalize_name() -> void;
35 
36  public:
37  enum Enc_Type { SINGLEBYTE = false, UTF8 = true };
38 
39  Encoding() = default;
40  explicit Encoding(const std::string& e) : name(e) { normalize_name(); }
41  explicit Encoding(std::string&& e) : name(move(e)) { normalize_name(); }
42  explicit Encoding(const char* e) : name(e) { normalize_name(); }
43  auto& operator=(const std::string& e)
44  {
45  name = e;
46  normalize_name();
47  return *this;
48  }
49  auto& operator=(std::string&& e)
50  {
51  name = move(e);
52  normalize_name();
53  return *this;
54  }
55  auto& operator=(const char* e)
56  {
57  name = e;
58  normalize_name();
59  return *this;
60  }
61  auto empty() const { return name.empty(); }
62  auto& value() const { return name; }
63  auto is_utf8() const { return name == "UTF-8"; }
64  auto value_or_default() const -> std::string
65  {
66  if (name.empty())
67  return "ISO8859-1";
68  else
69  return name;
70  }
71  operator Enc_Type() const { return is_utf8() ? UTF8 : SINGLEBYTE; }
72 };
73 
74 enum class Flag_Type { SINGLE_CHAR, DOUBLE_CHAR, NUMBER, UTF8 };
75 
86 using Word_List = Hash_Multimap<std::string, Flag_Set>;
87 
88 struct Aff_Data {
89  static constexpr auto HIDDEN_HOMONYM_FLAG = char16_t(-1);
90  static constexpr auto MAX_SUGGESTIONS = size_t(16);
91 
92  // spell checking options
93  Word_List words;
94  Prefix_Table prefixes;
95  Suffix_Table suffixes;
96 
97  bool complex_prefixes;
98  bool fullstrip;
99  bool checksharps;
100  bool forbid_warn;
101  char16_t compound_onlyin_flag;
102  char16_t circumfix_flag;
103  char16_t forbiddenword_flag;
104  char16_t keepcase_flag;
105  char16_t need_affix_flag;
106  char16_t warn_flag;
107 
108  // compounding options
109  char16_t compound_flag;
110  char16_t compound_begin_flag;
111  char16_t compound_last_flag;
112  char16_t compound_middle_flag;
113  Compound_Rule_Table compound_rules;
114 
115  // spell checking options
116  Break_Table break_table;
117  Substr_Replacer input_substr_replacer;
118  std::string ignored_chars;
119  icu::Locale icu_locale;
120  Substr_Replacer output_substr_replacer;
121 
122  // suggestion options
123  Replacement_Table replacements;
124  std::vector<Similarity_Group> similarities;
125  std::string keyboard_closeness;
126  std::string try_chars;
127  // Phonetic_Table phonetic_table;
128 
129  char16_t nosuggest_flag;
130  char16_t substandard_flag;
131  unsigned short max_compound_suggestions;
132  unsigned short max_ngram_suggestions;
133  unsigned short max_diff_factor;
134  bool only_max_diff;
135  bool no_split_suggestions;
136  bool suggest_with_dots;
137 
138  // compounding options
139  unsigned short compound_min_length;
140  unsigned short compound_max_word_count;
141  char16_t compound_permit_flag;
142  char16_t compound_forbid_flag;
143  char16_t compound_root_flag;
144  char16_t compound_force_uppercase;
145  bool compound_more_suffixes;
146  bool compound_check_duplicate;
147  bool compound_check_rep;
148  bool compound_check_case;
149  bool compound_check_triple;
150  bool compound_simplified_triple;
151  bool compound_syllable_num;
152  unsigned short compound_syllable_max;
153  std::string compound_syllable_vowels;
154  std::vector<Compound_Pattern> compound_patterns;
155 
156  // data members used only while parsing
157  Flag_Type flag_type;
158  Encoding encoding;
159  std::vector<Flag_Set> flag_aliases;
160  std::string wordchars; // deprecated?
161 
162  auto parse_aff(std::istream& in, std::ostream& err_msg) -> bool;
163  auto parse_dic(std::istream& in, std::ostream& err_msg) -> bool;
164  auto parse_aff_dic(std::istream& aff, std::istream& dic,
165  std::ostream& err_msg)
166  {
167  if (parse_aff(aff, err_msg))
168  return parse_dic(dic, err_msg);
169  return false;
170  }
171 };
172 NUSPELL_END_INLINE_NAMESPACE
173 } // namespace nuspell
174 #endif // NUSPELL_AFF_DATA_HXX
Definition: structures.hxx:440
Definition: structures.hxx:1382
Definition: aff_data.hxx:31
Definition: structures.hxx:1227
Definition: structures.hxx:1520
Definition: structures.hxx:315
Definition: structures.hxx:1279
Library main namespace.
Definition: aff_data.cxx:33
Definition: aff_data.hxx:88