Nuspell
spellchecker
locale_utils.hxx
Go to the documentation of this file.
1 /* Copyright 2016-2018 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
24 #ifndef LOCALE_UTILS_HXX
25 #define LOCALE_UTILS_HXX
26 
27 #include <locale>
28 #include <string>
29 
30 namespace nuspell {
31 
32 auto utf8_to_32_alternative(const std::string& s) -> std::u32string;
33 auto validate_utf8(const std::string& s) -> bool;
34 
35 auto wide_to_utf8(const std::wstring& in, std::string& out) -> void;
36 auto wide_to_utf8(const std::wstring& in) -> std::string;
37 
38 auto utf8_to_wide(const std::string& in, std::wstring& out) -> bool;
39 auto utf8_to_wide(const std::string& in) -> std::wstring;
40 
41 auto utf8_to_32(const std::string& in) -> std::u32string;
42 
43 auto is_ascii(char c) -> bool;
44 auto is_all_ascii(const std::string& s) -> bool;
45 
46 auto latin1_to_ucs2(const std::string& s) -> std::u16string;
47 auto latin1_to_ucs2(const std::string& s, std::u16string& out) -> void;
48 
49 auto is_bmp(char32_t c) -> bool;
50 auto is_all_bmp(const std::u32string& s) -> bool;
51 auto u32_to_ucs2_skip_non_bmp(const std::u32string& s) -> std::u16string;
52 auto u32_to_ucs2_skip_non_bmp(const std::u32string& s, std::u16string& out)
53  -> void;
54 
55 auto to_wide(const std::string& in, const std::locale& inloc, std::wstring& out)
56  -> bool;
57 auto to_wide(const std::string& in, const std::locale& inloc) -> std::wstring;
58 auto to_narrow(const std::wstring& in, std::string& out,
59  const std::locale& outloc) -> bool;
60 auto to_narrow(const std::wstring& in, const std::locale& outloc)
61  -> std::string;
62 
63 auto install_ctype_facets_inplace(std::locale& boost_loc) -> void;
64 
65 enum class Encoding_Details {
66  EXTERNAL_U8_INTERNAL_U8,
67  EXTERNAL_OTHER_INTERNAL_U8,
68  EXTERNAL_U8_INTERNAL_OTHER,
69  EXTERNAL_OTHER_INTERNAL_OTHER,
70  EXTERNAL_SAME_INTERNAL_AND_SINGLEBYTE
71 };
72 
73 auto analyze_encodings(const std::locale& external, const std::locale& internal)
74  -> Encoding_Details;
75 
79 enum class Casing {
80  SMALL ,
81  INIT_CAPITAL ,
82  ALL_CAPITAL ,
83  CAMEL ,
84  PASCAL
85 };
86 
87 template <class CharT>
88 auto classify_casing(const std::basic_string<CharT>& s,
89  const std::locale& loc = std::locale()) -> Casing;
90 
91 template <class CharT>
93  const std::basic_string<CharT>& word, size_t i, const std::locale& loc)
94  -> bool;
95 
96 class Encoding {
97  std::string name;
98 
99  auto normalize_name() -> void;
100 
101  public:
102  enum Enc_Type { SINGLEBYTE = false, UTF8 = true };
103 
104  Encoding() = default;
105  Encoding(const std::string& e) : name(e) { normalize_name(); }
106  Encoding(std::string&& e) : name(move(e)) { normalize_name(); }
107  Encoding(const char* e) : name(e) { normalize_name(); }
108  auto& operator=(const std::string& e)
109  {
110  name = e;
111  normalize_name();
112  return *this;
113  }
114  auto& operator=(std::string&& e)
115  {
116  name = move(e);
117  normalize_name();
118  return *this;
119  }
120  auto& operator=(const char* e)
121  {
122  name = e;
123  normalize_name();
124  return *this;
125  }
126  auto empty() const { return name.empty(); }
127  operator const std::string&() const { return name; }
128  auto& value() const { return name; }
129  auto is_utf8() const { return name == "UTF-8"; }
130  auto value_or_default() -> std::string
131  {
132  if (name.empty())
133  return "ISO8859-1";
134  else
135  return name;
136  }
137  operator Enc_Type() const { return is_utf8() ? UTF8 : SINGLEBYTE; }
138 };
139 } // namespace nuspell
140 #endif // LOCALE_UTILS_HXX
Definition: locale_utils.hxx:96
Casing
Casing type enum, ignoring neutral case characters.
Definition: locale_utils.hxx:79
start upper case, rest lower case, e.g.
all lower case or neutral case, e.g.
auto has_uppercase_at_compound_word_boundary(const std::basic_string< CharT > &word, size_t i, const std::locale &loc) -> bool
Check if word[i] or word[i-1] are uppercase.
Definition: locale_utils.cxx:880
camel case, start lower case, e.g.
auto classify_casing(const std::basic_string< CharT > &s, const std::locale &loc) -> Casing
Determines casing (capitalization) type for a word.
Definition: locale_utils.cxx:830
pascal case, start upper case, e.g.
Library main namespace.
Definition: aff_data.cxx:74
all upper case, e.g.