Nuspell
spellchecker
Namespaces | Classes | Typedefs | Enumerations | Functions | Variables
nuspell Namespace Reference

Library main namespace. More...

Namespaces

 v2
 Public API is inline namespace.
 

Classes

struct  Aff_Data
 
struct  Aff_Structures
 
struct  Affix
 
class  Affix_Table
 
struct  Affixing_Result
 
class  At_Scope_Exit
 
class  Break_Table
 
struct  Compound_Check_Pattern
 
struct  Compound_Pattern
 
class  Compound_Rule_Table
 
struct  Compounding_Result
 
class  Condition
 Limited regular expression matching used in affix entries. More...
 
struct  Dict_Base
 
struct  Directory
 
class  Encoding
 
struct  Globber
 
class  Hash_Multiset
 
struct  identity
 
class  List_Basic_Strings
 Vector of strings that recycles erased strings. More...
 
class  my_ctype
 
class  my_ctype< char >
 
class  my_ctype< wchar_t >
 
struct  Out_Iter_One_Bool
 
class  Phonetic_Table
 
class  Prefix
 
class  Prefix_Iter
 Iterator of prefix entres that match a word. More...
 
class  Replacement_Table
 
struct  Similarity_Group
 
class  String_Pair
 
class  String_Set
 A Set class backed by a string. More...
 
class  Substr_Replacer
 
class  Suffix
 
class  Suffix_Iter
 Iterator of suffix entres that match a word. More...
 
struct  sv_eq
 
struct  sv_hash
 
class  To_Root_Unroot_RAII
 
class  Word_List
 Map between words and word_flags. More...
 

Typedefs

using Word_List_Base = Hash_Multiset< std::pair< std::string, Flag_Set >, std::string, member< std::pair< std::string, Flag_Set >, std::string, &std::pair< std::string, Flag_Set >::first > >
 
using Flag_Set = String_Set< char16_t >
 
template<class CharT >
using my_string_view = boost::basic_string_view< CharT >
 
template<class CharT , class AffixT >
using Affix_Table_Base = Hash_Multiset< AffixT, std::basic_string< CharT >, member< AffixT, std::basic_string< CharT >, &AffixT::appending >, sv_hash< CharT >, sv_eq< CharT > >
 
template<class CharT >
using Prefix_Table = Affix_Table< CharT, Prefix< CharT > >
 
template<class CharT >
using Suffix_Table = Affix_Table< CharT, Suffix< CharT > >
 
using List_WStrings = List_Basic_Strings< wchar_t >
 

Enumerations

enum  Flag_Type { FLAG_SINGLE_CHAR, FLAG_DOUBLE_CHAR, FLAG_NUMBER, FLAG_UTF8 }
 
enum  Affixing_Mode { FULL_WORD, AT_COMPOUND_BEGIN, AT_COMPOUND_END, AT_COMPOUND_MIDDLE }
 
enum  Encoding_Details {
  EXTERNAL_U8_INTERNAL_U8, EXTERNAL_OTHER_INTERNAL_U8, EXTERNAL_U8_INTERNAL_OTHER, EXTERNAL_OTHER_INTERNAL_OTHER,
  EXTERNAL_SAME_INTERNAL_AND_SINGLEBYTE
}
 
enum  Casing {
  Casing::SMALL, Casing::INIT_CAPITAL, Casing::ALL_CAPITAL, Casing::CAMEL,
  Casing::PASCAL
}
 Casing type enum, ignoring neutral case characters. More...
 

Functions

void reset_failbit_istream (std::istream &in)
 
bool read_to_slash_or_space (std::istream &in, std::string &out)
 
template<class T , class Func >
auto parse_vector_of_T (istream &in, size_t line_num, const string &command, unordered_map< string, int > &counts, vector< T > &vec, Func parseLineFunc) -> void
 Parses vector of class T from an input stream. More...
 
auto decode_flags (istream &in, size_t line_num, Flag_Type t, const Encoding &enc) -> u16string
 Decodes flags. More...
 
auto decode_single_flag (istream &in, size_t line_num, Flag_Type t, const Encoding &enc) -> char16_t
 Decodes a single flag from an input stream. More...
 
auto decode_flags_possible_alias (istream &in, size_t line_num, Flag_Type t, const Encoding &enc, const vector< Flag_Set > &flag_aliases) -> u16string
 
auto parse_morhological_fields (istream &in, vector< string > &vecOut) -> void
 Parses morhological fields. More...
 
auto parse_affix (istream &in, size_t line_num, string &command, Flag_Type t, const Encoding &enc, const vector< Flag_Set > &flag_aliases, vector< Affix > &vec, unordered_map< string, pair< bool, int >> &cmd_affix) -> void
 Parses an affix from an input stream. More...
 
auto parse_flag_type (istream &in, size_t line_num, Flag_Type &flag_type) -> void
 Parses flag type. More...
 
auto parse_compound_rule (istream &in, size_t line_num, Flag_Type t, const Encoding &enc, u16string &ret)
 
auto strip_bom (istream &in)
 
auto dic_find_end_of_word_heuristics (const string &line)
 Scans line for morphological field [a-z][a-z]: More...
 
template<class AffixInner , class AffixOuter >
auto cross_valid_inner_outer (const AffixInner &inner, const AffixOuter &outer)
 
template<class Affix >
auto cross_valid_inner_outer (const Flag_Set &word_flags, const Affix &afx)
 
template<class CharT >
auto prefix (const basic_string< CharT > &word, size_t len)
 
template<class CharT >
auto prefix (basic_string< CharT > &&word, size_t len)=delete
 
template<class CharT >
auto suffix (const basic_string< CharT > &word, size_t len)
 
template<class CharT >
auto suffix (basic_string< CharT > &&word, size_t len)=delete
 
template<class CharT >
auto match_compound_pattern (const Compound_Pattern< CharT > &p, const basic_string< CharT > &word, size_t i, Compounding_Result first, Compounding_Result second)
 
template<class CharT >
auto is_compound_forbidden_by_patterns (const vector< Compound_Pattern< CharT >> &patterns, const basic_string< CharT > &word, size_t i, Compounding_Result first, Compounding_Result second)
 
template<class AffixT >
auto is_modiying_affix (const AffixT &a)
 
template<class OutIt >
auto get_default_search_paths (OutIt out) -> OutIt
 Gets the default search paths. More...
 
template<class OutIt >
auto get_mozilla_paths (OutIt out) -> OutIt
 Gets the Mozilla search paths. More...
 
template<class OutIt >
auto get_libreoffice_paths (OutIt out) -> OutIt
 Gets the LibreOffice search paths. More...
 
template<class OutIt >
auto get_openoffice_paths (OutIt out) -> OutIt
 Gets the Apache OpenOffice search paths. More...
 
template<class OutIt >
auto search_path_for_dicts (const string &dir, OutIt out) -> OutIt
 Searches directory for dictionaries. More...
 
template<class InpIter , class OutIter >
auto decode_utf8 (InpIter first, InpIter last, OutIter out) -> OutIter
 
auto utf8_to_32_alternative (const std::string &s) -> std::u32string
 
auto validate_utf8 (const std::string &s) -> bool
 
template<class InChar , class OutChar >
auto valid_utf_to_utf (const std::basic_string< InChar > &in, std::basic_string< OutChar > &out) -> void
 
template<class InChar , class OutChar >
auto utf_to_utf_my (const std::basic_string< InChar > &in, std::basic_string< OutChar > &out) -> bool
 
auto wide_to_utf8 (const std::wstring &in, std::string &out) -> void
 
auto wide_to_utf8 (const std::wstring &in) -> std::string
 
auto utf8_to_wide (const std::string &in, std::wstring &out) -> bool
 
auto utf8_to_wide (const std::string &in) -> std::wstring
 
auto utf8_to_32 (const std::string &in) -> std::u32string
 
auto is_ascii (char c) -> bool
 
auto is_all_ascii (const std::string &s) -> bool
 
template<class CharT >
auto widen_latin1 (char c) -> CharT
 
auto latin1_to_ucs2 (const std::string &s) -> std::u16string
 
auto latin1_to_ucs2 (const std::string &s, std::u16string &out) -> void
 
auto is_bmp (char32_t c) -> bool
 
auto is_all_bmp (const std::u32string &s) -> bool
 
auto u32_to_ucs2_skip_non_bmp (const std::u32string &s) -> std::u16string
 
auto u32_to_ucs2_skip_non_bmp (const std::u32string &s, std::u16string &out) -> void
 
auto to_wide (const std::string &in, const std::locale &loc, std::wstring &out) -> bool
 
auto to_wide (const std::string &in, const std::locale &loc) -> std::wstring
 
auto to_narrow (const std::wstring &in, std::string &out, const std::locale &loc) -> bool
 
auto to_narrow (const std::wstring &in, const std::locale &loc) -> std::string
 
auto get_char_mask (UChar32 cp)
 
auto install_ctype_facets_inplace (std::locale &boost_loc) -> void
 
auto analyze_encodings (const locale &external, const locale &internal) -> Encoding_Details
 
template<class CharT >
auto classify_casing (const std::basic_string< CharT > &s, const std::locale &loc) -> Casing
 Determines casing (capitalization) type for a word. More...
 
template auto classify_casing (const string &, const locale &) -> Casing
 
template auto classify_casing (const wstring &, const locale &) -> Casing
 
template<class CharT >
auto has_uppercase_at_compound_word_boundary (const std::basic_string< CharT > &word, size_t i, const std::locale &loc) -> bool
 Check if word[i] or word[i-1] are uppercase. More...
 
template auto has_uppercase_at_compound_word_boundary (const string &, size_t, const locale &) -> bool
 
template auto has_uppercase_at_compound_word_boundary (const wstring &, size_t, const locale &) -> bool
 
auto analyze_encodings (const std::locale &external, const std::locale &internal) -> Encoding_Details
 
template<class CharT >
auto constexpr literal_choose (const char *narrow, const wchar_t *wide)
 
template<>
auto constexpr literal_choose< char > (const char *narrow, const wchar_t *)
 
template<>
auto constexpr literal_choose< wchar_t > (const char *, const wchar_t *wide)
 
template<class CharT , class SepT , class OutIt >
auto split_on_any_of (const std::basic_string< CharT > &s, const SepT &sep, OutIt out)
 Splits string on set of single char seperators. More...
 
template<class CharT , class OutIt >
auto split (const std::basic_string< CharT > &s, CharT sep, OutIt out)
 Splits string on single char seperator. More...
 
template<class CharT , class OutIt >
auto split (const std::basic_string< CharT > &s, const std::basic_string< CharT > &sep, OutIt out)
 Splits string on string separator. More...
 
template<class CharT , class OutIt >
auto split (const std::basic_string< CharT > &s, const CharT *sep, OutIt out)
 Splits string on string separator. More...
 
template<class CharT , class CharOrStr >
auto split_v (const std::basic_string< CharT > &s, const CharOrStr &sep, std::vector< std::basic_string< CharT >> &v)
 Splits string on seperator, output to vector of strings. More...
 
template<class CharT , class CharOrStr >
auto split_first (const std::basic_string< CharT > &s, const CharOrStr &sep) -> std::basic_string< CharT >
 Gets the first token of a splitted string. More...
 
template<class CharT , class OutIt >
auto split_on_whitespace (const std::basic_string< CharT > &s, OutIt out, const std::locale &loc=std::locale()) -> OutIt
 Splits on whitespace. More...
 
template<class CharT >
auto split_on_whitespace_v (const std::basic_string< CharT > &s, std::vector< std::basic_string< CharT >> &v, const std::locale &loc=std::locale()) -> void
 Splits on whitespace, outputs to vector of strings. More...
 
template<class CharT >
auto & erase_chars (std::basic_string< CharT > &s, const std::basic_string< CharT > &erase_chars)
 
template<class CharT >
auto & replace_char (std::basic_string< CharT > &s, CharT from, CharT to)
 
template<class CharT >
auto is_number (const std::basic_string< CharT > &s) -> bool
 Tests if word is a number. More...
 
template<class DataIter , class PatternIter , class FuncEq = std::equal_to<>>
auto match_simple_regex (DataIter data_first, DataIter data_last, PatternIter pat_first, PatternIter pat_last, FuncEq eq=FuncEq())
 
template<class DataRange , class PatternRange , class FuncEq = std::equal_to<>>
auto match_simple_regex (const DataRange &data, const PatternRange &pattern, FuncEq eq=FuncEq())
 
auto match_compund_rule (const std::vector< const Flag_Set *> &words_data, const u16string &pattern)
 
template<class CharT >
auto swap (String_Set< CharT > &a, String_Set< CharT > &b)
 
template<class CharT >
auto swap (List_Basic_Strings< CharT > &a, List_Basic_Strings< CharT > &b)
 

Variables

const auto PATHSEP = ':'
 
const auto DIRSEP = '/'
 
const auto SEPARATORS = '/'
 

Detailed Description

Library main namespace.

Enumeration Type Documentation

◆ Casing

enum nuspell::Casing
strong

Casing type enum, ignoring neutral case characters.

Enumerator
SMALL 

all lower case or neutral case, e.g.

"lowercase" or "123"

INIT_CAPITAL 

start upper case, rest lower case, e.g.

"Initcap"

ALL_CAPITAL 

all upper case, e.g.

"UPPERCASE" or "ALL4ONE"

CAMEL 

camel case, start lower case, e.g.

"camelCase"

PASCAL 

pascal case, start upper case, e.g.

"PascalCase"

◆ Flag_Type

Enumerator
FLAG_SINGLE_CHAR 

single-character flag, e.g.

for "a"

FLAG_DOUBLE_CHAR 

double-character flag, e.g for "aa"

FLAG_NUMBER 

numerical flag, e.g.

for 61

FLAG_UTF8 

UTF-8 flag, e.g.

for "รก"

Function Documentation

◆ classify_casing()

template<class CharT >
auto nuspell::classify_casing ( const std::basic_string< CharT > &  s,
const std::locale &  loc 
) -> Casing

Determines casing (capitalization) type for a word.

Casing is sometimes referred to as capitalization.

Parameters
sword for which casing is determined.
loclocale object that takes care of case detection.
Returns
The casing type.

◆ decode_flags()

auto nuspell::decode_flags ( istream &  in,
size_t  line_num,
Flag_Type  t,
const Encoding enc 
) -> u16string

Decodes flags.

Expects that there are flags in the stream. If there are no flags in the stream (eg, stream is at eof) or if the format of the flags is incorrect the stream failbit will be set.

◆ decode_single_flag()

auto nuspell::decode_single_flag ( istream &  in,
size_t  line_num,
Flag_Type  t,
const Encoding enc 
) -> char16_t

Decodes a single flag from an input stream.

Parameters
ininput stream to decode from.
line_num
t
encencoding of the stream.
Returns
The value of the first decoded flag or 0 when no flag was decoded.

◆ dic_find_end_of_word_heuristics()

auto nuspell::dic_find_end_of_word_heuristics ( const string &  line)

Scans line for morphological field [a-z][a-z]:

Parameters
line
Returns
the end of the word before the morph field, or npos

◆ get_default_search_paths()

template<class OutIt >
auto nuspell::get_default_search_paths ( OutIt  out) -> OutIt

Gets the default search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ get_libreoffice_paths()

template<class OutIt >
auto nuspell::get_libreoffice_paths ( OutIt  out) -> OutIt

Gets the LibreOffice search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ get_mozilla_paths()

template<class OutIt >
auto nuspell::get_mozilla_paths ( OutIt  out) -> OutIt

Gets the Mozilla search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ get_openoffice_paths()

template<class OutIt >
auto nuspell::get_openoffice_paths ( OutIt  out) -> OutIt

Gets the Apache OpenOffice search paths.

Parameters
outOutput iterator, begin of the output range.
Returns
End of the output range.

◆ has_uppercase_at_compound_word_boundary()

template<class CharT >
auto nuspell::has_uppercase_at_compound_word_boundary ( const std::basic_string< CharT > &  word,
size_t  i,
const std::locale &  loc 
) -> bool

Check if word[i] or word[i-1] are uppercase.

Check if the two chars are alphabetic and at least one of them is in uppercase.

Parameters
word
i
loc
Returns
true if at least one is uppercase, false otherwise.

◆ is_number()

template<class CharT >
auto nuspell::is_number ( const std::basic_string< CharT > &  s) -> bool

Tests if word is a number.

Allow numbers with dots ".", dashes "-" and commas ",", but forbids double separators such as "..", "--" and ".,". This implementation increases performance over the regex implementation in the standard library.

◆ parse_affix()

auto nuspell::parse_affix ( istream &  in,
size_t  line_num,
string &  command,
Flag_Type  t,
const Encoding enc,
const vector< Flag_Set > &  flag_aliases,
vector< Affix > &  vec,
unordered_map< string, pair< bool, int >> &  cmd_affix 
) -> void

Parses an affix from an input stream.

Parameters
ininput stream to parse from.
line_num
[in,out]command
t
enc
flag_aliases
[in,out]vec
[in,out]cmd_affix

◆ parse_flag_type()

auto nuspell::parse_flag_type ( istream &  in,
size_t  line_num,
Flag_Type flag_type 
) -> void

Parses flag type.

Parameters
ininput stream to parse from.
line_num
[out]flag_type

◆ parse_morhological_fields()

auto nuspell::parse_morhological_fields ( istream &  in,
vector< string > &  vecOut 
) -> void

Parses morhological fields.

Parameters
ininput stream to parse from.
[in,out]vecOut

◆ parse_vector_of_T()

template<class T , class Func >
auto nuspell::parse_vector_of_T ( istream &  in,
size_t  line_num,
const string &  command,
unordered_map< string, int > &  counts,
vector< T > &  vec,
Func  parseLineFunc 
) -> void

Parses vector of class T from an input stream.

Parameters
ininput stream to decode from.
line_num
command
[in,out]counts
[in,out]vec
parseLineFunc

◆ search_path_for_dicts()

template<class OutIt >
auto nuspell::search_path_for_dicts ( const string &  dir,
OutIt  out 
) -> OutIt

Searches directory for dictionaries.

Parameters
dirdirectory path.
outoutput iter where to append the found dictionary names.
Returns
end of the output range

◆ split() [1/3]

template<class CharT , class OutIt >
auto nuspell::split ( const std::basic_string< CharT > &  s,
CharT  sep,
OutIt  out 
)

Splits string on single char seperator.

Consecutive separators are treated as separate and will emit empty strings.

Parameters
sstring to split.
sepchar that acts as separator to split on.
outstart of the output range where separated strings are appended.
Returns
The iterator that indicates the end of the output range.

◆ split() [2/3]

template<class CharT , class OutIt >
auto nuspell::split ( const std::basic_string< CharT > &  s,
const std::basic_string< CharT > &  sep,
OutIt  out 
)

Splits string on string separator.

Parameters
sstring to split.
sepseperator to split on.
outstart of the output range where separated strings are appended.
Returns
The end of the output range where separated strings are appended.

◆ split() [3/3]

template<class CharT , class OutIt >
auto nuspell::split ( const std::basic_string< CharT > &  s,
const CharT *  sep,
OutIt  out 
)

Splits string on string separator.

Parameters
sstring to split.
sepseperator to split on.
outstart of the output range where separated strings are appended.
Returns
The end of the output range where separated strings are appended.

◆ split_first()

template<class CharT , class CharOrStr >
auto nuspell::split_first ( const std::basic_string< CharT > &  s,
const CharOrStr &  sep 
) -> std::basic_string<CharT>

Gets the first token of a splitted string.

Parameters
sstring to split.
sepchar or string that acts as separator to split on.
Returns
The string that has been split off.

◆ split_on_any_of()

template<class CharT , class SepT , class OutIt >
auto nuspell::split_on_any_of ( const std::basic_string< CharT > &  s,
const SepT &  sep,
OutIt  out 
)

Splits string on set of single char seperators.

Consecutive separators are treated as separate and will emit empty strings.

Parameters
sstring to split.
sepseperator(s) to split on.
outstart of the output range where separated strings are appended.
Returns
The end of the output range where separated strings are appended.

◆ split_on_whitespace()

template<class CharT , class OutIt >
auto nuspell::split_on_whitespace ( const std::basic_string< CharT > &  s,
OutIt  out,
const std::locale &  loc = std::locale() 
) -> OutIt

Splits on whitespace.

Consecutive whitespace is treated as single separator. Behaves same as Python's split called without separator argument.

Parameters
sstring to split.
outstart of the output range where separated strings are appended.
loclocale object that takes care of what is whitespace.
Returns
The iterator that indicates the end of the output range.

◆ split_on_whitespace_v()

template<class CharT >
auto nuspell::split_on_whitespace_v ( const std::basic_string< CharT > &  s,
std::vector< std::basic_string< CharT >> &  v,
const std::locale &  loc = std::locale() 
) -> void

Splits on whitespace, outputs to vector of strings.

See split_on_whitespace().

Parameters
sstring to split.
[out]vvector with separated strings. The vector is first cleared.
loclocale object that takes care of what is whitespace.

◆ split_v()

template<class CharT , class CharOrStr >
auto nuspell::split_v ( const std::basic_string< CharT > &  s,
const CharOrStr &  sep,
std::vector< std::basic_string< CharT >> &  v 
)

Splits string on seperator, output to vector of strings.

See split().

Parameters
sstring to split.
sepseparator to split on.
[out]vvector with separated strings. The vector is first cleared.