nuspell 5.1.6
Nuspell spellchecking library
Loading...
Searching...
No Matches
aff_data.hxx
1/* Copyright 2016-2024 Dimitrij Mijoski
2 *
3 * This file is part of Nuspell.
4 *
5 * Nuspell is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * Nuspell is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef NUSPELL_AFF_DATA_HXX
20#define NUSPELL_AFF_DATA_HXX
21
22#include "nuspell_export.h"
23#include "structures.hxx"
24
25#include <iosfwd>
26#include <unicode/locid.h>
27
28namespace nuspell {
29NUSPELL_BEGIN_INLINE_NAMESPACE
30
31class Encoding {
32 std::string name;
33
34 NUSPELL_EXPORT auto normalize_name() -> void;
35
36 public:
37 enum Enc_Type { SINGLEBYTE = false, UTF8 = true };
38
39 Encoding() = default;
40 explicit Encoding(const std::string& e) : name(e) { normalize_name(); }
41 explicit Encoding(std::string&& e) : name(std::move(e))
42 {
43 normalize_name();
44 }
45 explicit Encoding(const char* e) : name(e) { normalize_name(); }
46 auto& operator=(const std::string& e)
47 {
48 name = e;
49 normalize_name();
50 return *this;
51 }
52 auto& operator=(std::string&& e)
53 {
54 name = std::move(e);
55 normalize_name();
56 return *this;
57 }
58 auto& operator=(const char* e)
59 {
60 name = e;
61 normalize_name();
62 return *this;
63 }
64 auto empty() const { return name.empty(); }
65 auto& value() const { return name; }
66 auto is_utf8() const { return name == "UTF-8"; }
67 auto value_or_default() const -> std::string
68 {
69 if (name.empty())
70 return "ISO8859-1";
71 else
72 return name;
73 }
74 operator Enc_Type() const { return is_utf8() ? UTF8 : SINGLEBYTE; }
75};
76
77enum class Flag_Type { SINGLE_CHAR, DOUBLE_CHAR, NUMBER, UTF8 };
78
89using Word_List = Hash_Multimap<std::string, Flag_Set>;
90
91struct Aff_Data {
92 static constexpr char16_t HIDDEN_HOMONYM_FLAG = -1;
93 static constexpr size_t MAX_SUGGESTIONS = 16;
94
95 // spell checking options
96 Word_List words = {};
97 Prefix_Table prefixes = {};
98 Suffix_Table suffixes = {};
99
100 bool complex_prefixes = {};
101 bool fullstrip = {};
102 bool checksharps = {};
103 bool forbid_warn = {};
104 char16_t compound_onlyin_flag = {};
105 char16_t circumfix_flag = {};
106 char16_t forbiddenword_flag = {};
107 char16_t keepcase_flag = {};
108 char16_t need_affix_flag = {};
109 char16_t warn_flag = {};
110
111 // compounding options
112 char16_t compound_flag = {};
113 char16_t compound_begin_flag = {};
114 char16_t compound_last_flag = {};
115 char16_t compound_middle_flag = {};
116 Compound_Rule_Table compound_rules = {};
117
118 // spell checking options
119 Break_Table break_table = {};
120 Substr_Replacer input_substr_replacer = {};
121 std::string ignored_chars = {};
122 icu::Locale icu_locale = {};
123 Substr_Replacer output_substr_replacer = {};
124
125 // suggestion options
126 Replacement_Table replacements = {};
127 std::vector<Similarity_Group> similarities = {};
128 std::string keyboard_closeness = {};
129 std::string try_chars = {};
130 // Phonetic_Table phonetic_table = {};
131
132 char16_t nosuggest_flag = {};
133 char16_t substandard_flag = {};
134 unsigned short max_compound_suggestions = {};
135 unsigned short max_ngram_suggestions = {};
136 unsigned short max_diff_factor = {};
137 bool only_max_diff = {};
138 bool no_split_suggestions = {};
139 bool suggest_with_dots = {};
140
141 // compounding options
142 unsigned short compound_min_length = {};
143 unsigned short compound_max_word_count = {};
144 char16_t compound_permit_flag = {};
145 char16_t compound_forbid_flag = {};
146 char16_t compound_root_flag = {};
147 char16_t compound_force_uppercase = {};
148 bool compound_more_suffixes = {};
149 bool compound_check_duplicate = {};
150 bool compound_check_rep = {};
151 bool compound_check_case = {};
152 bool compound_check_triple = {};
153 bool compound_simplified_triple = {};
154 bool compound_syllable_num = {};
155 unsigned short compound_syllable_max = {};
156 std::string compound_syllable_vowels = {};
157 std::vector<Compound_Pattern> compound_patterns = {};
158
159 // data members used only while parsing
160 Flag_Type flag_type = {};
161 Encoding encoding = {};
162 std::vector<Flag_Set> flag_aliases = {};
163 std::string wordchars = {}; // deprecated?
164
165 auto parse_aff(std::istream& in, std::ostream& err_msg) -> bool;
166 auto parse_dic(std::istream& in, std::ostream& err_msg) -> bool;
167 auto parse_aff_dic(std::istream& aff, std::istream& dic,
168 std::ostream& err_msg)
169 {
170 if (parse_aff(aff, err_msg))
171 return parse_dic(dic, err_msg);
172 return false;
173 }
174};
175NUSPELL_END_INLINE_NAMESPACE
176} // namespace nuspell
177#endif // NUSPELL_AFF_DATA_HXX
Library main namespace.
Definition aff_data.cxx:33