Nuspell
spellchecker
Namespaces | Classes | Typedefs | Enumerations | Functions | Variables
nuspell Namespace Reference

Library main namespace. More...

Namespaces

 v2
 Public API is inline namespace.
 

Classes

struct  Aff_Data
 
struct  Affix
 
class  Affix_Table
 
struct  Affixing_Result
 
class  At_Scope_Exit
 
class  Break_Table
 
struct  Compound_Check_Pattern
 
struct  Compound_Pattern
 
class  Compound_Rule_Table
 
struct  Compounding_Result
 
class  Condition
 Limited regular expression matching used in affix entries. More...
 
struct  Dict_Base
 
struct  Directory
 
class  Encoding
 
class  Encoding_Converter
 
struct  Globber
 
class  Hash_Multiset
 
struct  identity
 
class  List_Basic_Strings
 Vector of strings that recycles erased strings. More...
 
class  Phonetic_Table
 
class  Prefix
 
class  Prefix_Iter
 Iterator of prefix entres that match a word. More...
 
class  Replacement_Table
 
class  Setlocale_To_C_In_Scope
 
struct  Similarity_Group
 
class  String_Pair
 
class  String_Set
 A Set class backed by a string. More...
 
class  Substr_Replacer
 
class  Suffix
 
class  Suffix_Iter
 Iterator of suffix entres that match a word. More...
 
class  To_Root_Unroot_RAII
 
class  Word_List
 Map between words and word_flags. More...
 

Typedefs

using Word_List_Base = Hash_Multiset< std::pair< std::string, Flag_Set >, string_view, member< std::pair< std::string, Flag_Set >, std::string, &std::pair< std::string, Flag_Set >::first > >
 
template<class CharT >
using my_string_view = NUSPELL_STR_VIEW_NS::basic_string_view< CharT >
 
using string_view = my_string_view< char >
 
using wstring_view = my_string_view< wchar_t >
 
using Flag_Set = String_Set< char16_t >
 
template<class CharT , class AffixT >
using Affix_Table_Base = Hash_Multiset< AffixT, my_string_view< CharT >, member< AffixT, std::basic_string< CharT >, &AffixT::appending > >
 
template<class CharT >
using Prefix_Table = Affix_Table< CharT, Prefix< CharT > >
 
template<class CharT >
using Suffix_Table = Affix_Table< CharT, Suffix< CharT > >
 
using List_Strings = List_Basic_Strings< char >
 
using List_WStrings = List_Basic_Strings< wchar_t >
 

Enumerations

enum  Flag_Parsing_Error {
  NONUTF8_FLAGS_ABOVE_127_WARNING = -1, NO_ERROR = 0, MISSING_FLAGS, UNPAIRED_LONG_FLAG,
  INVALID_NUMERIC_FLAG, INVALID_UTF8, FLAG_ABOVE_65535, INVALID_NUMERIC_ALIAS,
  COMPOUND_RULE_INVALID_FORMAT
}
 
enum  Flag_Type { Flag_Type::SINGLE_CHAR, Flag_Type::DOUBLE_CHAR, Flag_Type::NUMBER, Flag_Type::UTF8 }
 
enum  Affixing_Mode { FULL_WORD, AT_COMPOUND_BEGIN, AT_COMPOUND_END, AT_COMPOUND_MIDDLE }
 
enum  Casing {
  Casing::SMALL, Casing::INIT_CAPITAL, Casing::ALL_CAPITAL, Casing::CAMEL,
  Casing::PASCAL
}
 Casing type enum, ignoring neutral case characters. More...
 

Functions

void reset_failbit_istream (std::istream &in)
 
template<class T , class Func >
auto parse_vector_of_T (istream &in, size_t line_num, const string &command, unordered_map< string, int > &counts, vector< T > &vec, Func parseLineFunc) -> void
 Parses vector of class T from an input stream. More...
 
auto decode_flags (const string &s, Flag_Type t, const Encoding &enc, u16string &out) -> Flag_Parsing_Error
 
auto decode_flags_possible_alias (const string &s, Flag_Type t, const Encoding &enc, const vector< Flag_Set > &flag_aliases, u16string &out) -> Flag_Parsing_Error
 
auto report_flag_parsing_error (Flag_Parsing_Error err, size_t line_num)
 
auto decode_flags (istream &in, size_t line_num, Flag_Type t, const Encoding &enc, u16string &out) -> istream &
 Decodes flags. More...
 
auto decode_flags_possible_alias (istream &in, size_t line_num, Flag_Type t, const Encoding &enc, const vector< Flag_Set > &flag_aliases, u16string &out) -> istream &
 
auto decode_single_flag (istream &in, size_t line_num, Flag_Type t, const Encoding &enc) -> char16_t
 Decodes a single flag from an input stream. More...
 
auto parse_word_slash_flags (istream &in, size_t line_num, Flag_Type t, const Encoding &enc, const vector< Flag_Set > &flag_aliases, string &word, u16string &flags) -> istream &
 
auto parse_word_slash_single_flag (istream &in, size_t line_num, Flag_Type t, const Encoding &enc, string &word, char16_t &flag) -> istream &
 
auto parse_morhological_fields (istream &in, vector< string > &vecOut) -> void
 Parses morhological fields. More...
 
auto parse_affix (istream &in, size_t line_num, string &command, Flag_Type t, const Encoding &enc, const vector< Flag_Set > &flag_aliases, vector< Affix > &vec, unordered_map< string, pair< bool, int >> &cmd_affix) -> void
 Parses an affix from an input stream. More...
 
auto parse_flag_type (istream &in, size_t line_num, Flag_Type &flag_type) -> void
 Parses flag type. More...
 
auto parse_compound_rule (const string &s, Flag_Type t, const Encoding &enc, u16string &out) -> Flag_Parsing_Error
 
auto parse_compound_rule (istream &in, size_t line_num, Flag_Type t, const Encoding &enc, u16string &out) -> istream &
 
auto strip_utf8_bom (std::istream &in) -> void
 
auto dic_find_end_of_word_heuristics (const string &line)
 Scans line for morphological field [a-z][a-z]: More...
 
template<class AffixInner , class AffixOuter >
auto cross_valid_inner_outer (const AffixInner &inner, const AffixOuter &outer)
 
template<class Affix >
auto cross_valid_inner_outer (const Flag_Set &word_flags, const Affix &afx)
 
auto prefix (const wstring &word, size_t len)
 
auto prefix (wstring &&word, size_t len)=delete
 
auto suffix (const wstring &word, size_t len)
 
auto suffix (wstring &&word, size_t len)=delete
 
auto match_compound_pattern (const Compound_Pattern< wchar_t > &p, const wstring &word, size_t i, Compounding_Result first, Compounding_Result second)
 
auto is_compound_forbidden_by_patterns (const vector< Compound_Pattern< wchar_t >> &patterns, const wstring &word, size_t i, Compounding_Result first, Compounding_Result second)
 
template<class AffixT >
auto is_modiying_affix (const AffixT &a)
 
template<class OutIt >
auto get_default_search_paths (OutIt out) -> OutIt
 Gets the default search paths. More...
 
template<class OutIt >
auto get_mozilla_paths (OutIt out) -> OutIt
 Gets the Mozilla search paths. More...
 
template<class OutIt >
auto get_libreoffice_paths (OutIt out) -> OutIt
 Gets the LibreOffice search paths. More...
 
template<class OutIt >
auto get_openoffice_paths (OutIt out) -> OutIt
 Gets the Apache OpenOffice search paths. More...
 
template<class OutIt >
auto search_path_for_dicts (const string &dir, OutIt out) -> OutIt
 Searches directory for dictionaries. More...
 
auto validate_utf8 (const std::string &s) -> bool
 
template<class InChar , class OutContainer >
auto valid_utf_to_utf (const std::basic_string< InChar > &in, OutContainer &out) -> void
 
template<class InChar , class OutContainer >
auto utf_to_utf_my (const std::basic_string< InChar > &in, OutContainer &out) -> bool
 
auto wide_to_utf8 (const std::wstring &in, std::string &out) -> void
 
auto wide_to_utf8 (const std::wstring &in) -> std::string
 
auto wide_to_utf8 (const std::wstring &in, boost::container::small_vector_base< char > &out) -> void
 
auto utf8_to_wide (const std::string &in, std::wstring &out) -> bool
 
auto utf8_to_wide (const std::string &in) -> std::wstring
 
auto utf8_to_16 (const std::string &in) -> std::u16string
 
bool utf8_to_16 (const std::string &in, std::u16string &out)
 
auto is_ascii (char c) -> bool
 
auto is_all_ascii (const std::string &s) -> bool
 
template<class CharT >
auto widen_latin1 (char c) -> CharT
 
auto latin1_to_ucs2 (const std::string &s) -> std::u16string
 
auto latin1_to_ucs2 (const std::string &s, std::u16string &out) -> void
 
auto is_surrogate_pair (char16_t c) -> bool
 
auto is_all_bmp (const std::u16string &s) -> bool
 
auto to_wide (const std::string &in, const std::locale &loc, std::wstring &out) -> bool
 
auto to_wide (const std::string &in, const std::locale &loc) -> std::wstring
 
auto to_narrow (const std::wstring &in, std::string &out, const std::locale &loc) -> bool
 
auto to_narrow (const std::wstring &in, const std::locale &loc) -> std::string
 
auto is_locale_known_utf8 (const locale &loc) -> bool
 
auto wide_to_icu (const std::wstring &in, icu::UnicodeString &out) -> bool
 
auto icu_to_wide (const icu::UnicodeString &in, std::wstring &out) -> bool
 
auto to_upper (const std::wstring &in, const icu::Locale &loc) -> std::wstring
 
auto to_title (const std::wstring &in, const icu::Locale &loc) -> std::wstring
 
auto to_lower (const std::wstring &in, const icu::Locale &loc) -> std::wstring
 
auto classify_casing (const std::wstring &s) -> Casing
 Determines casing (capitalization) type for a word. More...
 
auto has_uppercase_at_compound_word_boundary (const std::wstring &word, size_t i) -> bool
 Check if word[i] or word[i-1] are uppercase. More...
 
auto is_locale_known_utf8 (const std::locale &loc) -> bool
 
template<class CharT >
auto constexpr literal_choose (const char *narrow, const wchar_t *wide)
 
template<>
auto constexpr literal_choose< char > (const char *narrow, const wchar_t *)
 
template<>
auto constexpr literal_choose< wchar_t > (const char *, const wchar_t *wide)
 
template<class CharT , class SepT , class OutIt >
auto split_on_any_of (const std::basic_string< CharT > &s, const SepT &sep, OutIt out)
 Splits string on set of single char seperators. More...
 
template<class CharT , class OutIt >
auto split (const std::basic_string< CharT > &s, CharT sep, OutIt out)
 Splits string on single char seperator. More...
 
template<class CharT , class OutIt >
auto split_on_whitespace (const std::basic_string< CharT > &s, OutIt out, const std::locale &loc=std::locale()) -> OutIt
 Splits on whitespace. More...
 
template<class CharT >
auto split_on_whitespace_v (const std::basic_string< CharT > &s, std::vector< std::basic_string< CharT >> &v, const std::locale &loc=std::locale()) -> void
 Splits on whitespace, outputs to vector of strings. More...
 
template<class CharT >
auto & erase_chars (std::basic_string< CharT > &s, const std::basic_string< CharT > &erase_chars)
 
template<class CharT >
auto & replace_char (std::basic_string< CharT > &s, CharT from, CharT to)
 
template<class CharT >
auto is_number (const std::basic_string< CharT > &s) -> bool
 Tests if word is a number. More...
 
template<class DataIter , class PatternIter , class FuncEq = std::equal_to<>>
auto match_simple_regex (DataIter data_first, DataIter data_last, PatternIter pat_first, PatternIter pat_last, FuncEq eq=FuncEq())
 
template<class DataRange , class PatternRange , class FuncEq = std::equal_to<>>
auto match_simple_regex (const DataRange &data, const PatternRange &pattern, FuncEq eq=FuncEq())
 
template<class CharT >
auto swap (String_Set< CharT > &a, String_Set< CharT > &b)
 
auto match_compund_rule (const std::vector< const Flag_Set *> &words_data, const std::u16string &pattern)
 
template<class CharT >
auto swap (List_Basic_Strings< CharT > &a, List_Basic_Strings< CharT > &b)
 

Variables

const auto PATHSEP = ':'
 
const auto DIRSEP = '/'
 
const auto SEPARATORS = '/'
 

Detailed Description

Library main namespace.

Enumeration Type Documentation

◆ Casing

enum nuspell::Casing
strong

Casing type enum, ignoring neutral case characters.

Enumerator
SMALL 

all lower case or neutral case, e.g.

"lowercase" or "123"

INIT_CAPITAL 

start upper case, rest lower case, e.g.

"Initcap"

ALL_CAPITAL 

all upper case, e.g.

"UPPERCASE" or "ALL4ONE"

CAMEL 

camel case, start lower case, e.g.

"camelCase"

PASCAL 

pascal case, start upper case, e.g.

"PascalCase"

◆ Flag_Type

enum nuspell::Flag_Type
strong
Enumerator
SINGLE_CHAR 

single-character flag, e.g.

for "a"

DOUBLE_CHAR 

double-character flag, e.g for "aa"

NUMBER 

numerical flag, e.g.

for 61

UTF8 

UTF-8 flag, e.g.

for "รก"

Function Documentation

◆ classify_casing()

auto nuspell::classify_casing ( const std::wstring &  s) -> Casing

Determines casing (capitalization) type for a word.

Casing is sometimes referred to as capitalization.

Parameters
sword for which casing is determined.
Returns
The casing type.

◆ decode_flags()

auto nuspell::decode_flags ( istream &  in,
size_t  line_num,
Flag_Type  t,
const Encoding enc,
u16string &  out 
) -> istream&

Decodes flags.

Expects that there are flags in the stream. If there are no flags in the stream (eg, stream is at eof) or if the format of the flags is incorrect the stream failbit will be set.

◆ decode_single_flag()

auto nuspell::decode_single_flag ( istream &  in,
size_t  line_num,
Flag_Type  t,
const Encoding enc 
) -> char16_t

Decodes a single flag from an input stream.

Parameters
ininput stream to decode from.
line_num
t
encencoding of the stream.
Returns
The value of the first decoded flag or 0 when no flag was decoded.

◆ dic_find_end_of_word_heuristics()

auto nuspell::dic_find_end_of_word_heuristics ( const string &  line)

Scans line for morphological field [a-z][a-z]:

Parameters
line
Returns
the end of the word before the morph field, or npos

◆ get_default_search_paths()

template<class OutIt >
auto nuspell::get_default_search_paths ( OutIt  out) -> OutIt

Gets the default search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ get_libreoffice_paths()

template<class OutIt >
auto nuspell::get_libreoffice_paths ( OutIt  out) -> OutIt

Gets the LibreOffice search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ get_mozilla_paths()

template<class OutIt >
auto nuspell::get_mozilla_paths ( OutIt  out) -> OutIt

Gets the Mozilla search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ get_openoffice_paths()

template<class OutIt >
auto nuspell::get_openoffice_paths ( OutIt  out) -> OutIt

Gets the Apache OpenOffice search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ has_uppercase_at_compound_word_boundary()

auto nuspell::has_uppercase_at_compound_word_boundary ( const std::wstring &  word,
size_t  i 
) -> bool

Check if word[i] or word[i-1] are uppercase.

Check if the two chars are alphabetic and at least one of them is in uppercase.

Parameters
word
i
loc
Returns
true if at least one is uppercase, false otherwise.

◆ is_number()

template<class CharT >
auto nuspell::is_number ( const std::basic_string< CharT > &  s) -> bool

Tests if word is a number.

Allow numbers with dots ".", dashes "-" and commas ",", but forbids double separators such as "..", "--" and ".,". This implementation increases performance over the regex implementation in the standard library.

◆ parse_affix()

auto nuspell::parse_affix ( istream &  in,
size_t  line_num,
string &  command,
Flag_Type  t,
const Encoding enc,
const vector< Flag_Set > &  flag_aliases,
vector< Affix > &  vec,
unordered_map< string, pair< bool, int >> &  cmd_affix 
) -> void

Parses an affix from an input stream.

Parameters
ininput stream to parse from.
line_num
[in,out]command
t
enc
flag_aliases
[in,out]vec
[in,out]cmd_affix

◆ parse_flag_type()

auto nuspell::parse_flag_type ( istream &  in,
size_t  line_num,
Flag_Type flag_type 
) -> void

Parses flag type.

Parameters
ininput stream to parse from.
line_num
[out]flag_type

◆ parse_morhological_fields()

auto nuspell::parse_morhological_fields ( istream &  in,
vector< string > &  vecOut 
) -> void

Parses morhological fields.

Parameters
ininput stream to parse from.
[in,out]vecOut

◆ parse_vector_of_T()

template<class T , class Func >
auto nuspell::parse_vector_of_T ( istream &  in,
size_t  line_num,
const string &  command,
unordered_map< string, int > &  counts,
vector< T > &  vec,
Func  parseLineFunc 
) -> void

Parses vector of class T from an input stream.

Parameters
ininput stream to decode from.
line_num
command
[in,out]counts
[in,out]vec
parseLineFunc

◆ search_path_for_dicts()

template<class OutIt >
auto nuspell::search_path_for_dicts ( const string &  dir,
OutIt  out 
) -> OutIt

Searches directory for dictionaries.

Parameters
dirdirectory path.
outoutput iter where to append the found dictionary names.
Returns
end of the output range

◆ split()

template<class CharT , class OutIt >
auto nuspell::split ( const std::basic_string< CharT > &  s,
CharT  sep,
OutIt  out 
)

Splits string on single char seperator.

Consecutive separators are treated as separate and will emit empty strings.

Parameters
sstring to split.
sepchar that acts as separator to split on.
outstart of the output range where separated strings are appended.
Returns
The iterator that indicates the end of the output range.

◆ split_on_any_of()

template<class CharT , class SepT , class OutIt >
auto nuspell::split_on_any_of ( const std::basic_string< CharT > &  s,
const SepT &  sep,
OutIt  out 
)

Splits string on set of single char seperators.

Consecutive separators are treated as separate and will emit empty strings.

Parameters
sstring to split.
sepseperator(s) to split on.
outstart of the output range where separated strings are appended.
Returns
The end of the output range where separated strings are appended.

◆ split_on_whitespace()

template<class CharT , class OutIt >
auto nuspell::split_on_whitespace ( const std::basic_string< CharT > &  s,
OutIt  out,
const std::locale &  loc = std::locale() 
) -> OutIt

Splits on whitespace.

Consecutive whitespace is treated as single separator. Behaves same as Python's split called without separator argument.

Parameters
sstring to split.
outstart of the output range where separated strings are appended.
loclocale object that takes care of what is whitespace.
Returns
The iterator that indicates the end of the output range.

◆ split_on_whitespace_v()

template<class CharT >
auto nuspell::split_on_whitespace_v ( const std::basic_string< CharT > &  s,
std::vector< std::basic_string< CharT >> &  v,
const std::locale &  loc = std::locale() 
) -> void

Splits on whitespace, outputs to vector of strings.

See split_on_whitespace().

Parameters
sstring to split.
[out]vvector with separated strings. The vector is first cleared.
loclocale object that takes care of what is whitespace.