Nuspell
spellchecker
Classes | Typedefs | Enumerations | Functions | Variables
nuspell::v5 Namespace Reference

Library main namespace with version number attached. More...

Classes

struct  Aff_Data
 
struct  Affixing_Result
 
struct  Affixing_Result< T1, void >
 
struct  Affixing_Result< void, void >
 
struct  Affixing_Result_Base
 
class  At_Scope_Exit
 
class  Break_Table
 
struct  Checker
 
struct  Compound_Pattern
 
class  Compound_Rule_Table
 
struct  Compounding_Result
 
class  Condition
 
struct  Condition_Exception
 
class  Dict_Finder_For_CLI_Tool
 Don't use this except from Nuspell CLI tool. More...
 
class  Dictionary
 The only important public class. More...
 
class  Dictionary_Loading_Error
 The only important public exception. More...
 
struct  Directory
 
class  Encoding
 
class  Encoding_Converter
 
struct  Extractor_Of_Appending_From_Affix
 
struct  Globber
 
class  Hash_Multimap
 
struct  identity
 
struct  Idx_And_Next_CP
 
struct  Idx_And_Next_CP_Valid
 
struct  Idx_And_Prev_CP
 
struct  Idx_And_Prev_CP_Valid
 
class  Phonetic_Table
 
struct  Prefix
 
class  Prefix_Multiset
 
class  Prefix_Table
 
class  Replacement_Table
 
class  Reversed_String_View
 
class  Setlocale_To_C_In_Scope
 
struct  Similarity_Group
 
class  String_Pair
 
struct  String_Reverser
 
class  String_Set
 
class  Subrange
 
class  Substr_Replacer
 
struct  Suffix
 
class  Suffix_Table
 
struct  Suggester
 
class  To_Root_Unroot_RAII
 
struct  U8_CP_Pos
 
class  U8_Encoded_CP
 
struct  Write_CP_Idx_and_Error
 

Typedefs

using Word_List = Hash_Multimap< std::string, Flag_Set >
 
using Flag_Set = String_Set< char16_t >
 
template<class T , class Key_Extr = identity>
using Suffix_Multiset = Prefix_Multiset< T, Key_Extr, String_Reverser< typename Prefix_Multiset< T, Key_Extr >::Char_Type > >
 
using List_Strings = std::vector< std::string >
 

Enumerations

enum  Flag_Type { SINGLE_CHAR, DOUBLE_CHAR, NUMBER, UTF8 }
 
enum  Affixing_Mode { FULL_WORD, AT_COMPOUND_BEGIN, AT_COMPOUND_END, AT_COMPOUND_MIDDLE }
 
enum  Casing : char {
  SMALL, INIT_CAPITAL, ALL_CAPITAL, CAMEL,
  PASCAL
}
 

Functions

auto dic_find_end_of_word_heuristics (const string &line)
 
auto match_compound_pattern (const Compound_Pattern &p, string_view word, size_t i, Compounding_Result first, Compounding_Result second)
 
auto is_compound_forbidden_by_patterns (const vector< Compound_Pattern > &patterns, string_view word, size_t i, Compounding_Result first, Compounding_Result second)
 
auto are_three_code_points_equal (string_view word, size_t i) -> bool
 
template<class AffixT >
auto is_modiying_affix (const AffixT &a)
 
template<class AffixInner , class AffixOuter >
auto cross_valid_inner_outer (const AffixInner &inner, const AffixOuter &outer)
 
template<class Affix >
auto cross_valid_inner_outer (const Flag_Set &word_flags, const Affix &afx)
 
auto append_default_dir_paths (std::vector< string > &paths) -> void
 Append the paths of the default directories to be searched for dictionaries. More...
 
auto append_libreoffice_dir_paths (std::vector< std::string > &paths) -> void
 Append the paths of the LibreOffice's directories to be searched for dictionaries. More...
 
auto search_dir_for_dicts (const string &dir_path, vector< pair< string, string >> &dict_list) -> void
 Search a directory for dictionaries. More...
 
auto search_dirs_for_dicts (const std::vector< string > &dir_paths, std::vector< std::pair< string, string >> &dict_list) -> void
 Search the directories for dictionaries. More...
 
auto search_default_dirs_for_dicts (std::vector< std::pair< std::string, std::string >> &dict_list) -> void
 Search the default directories for dictionaries. More...
 
auto find_dictionary (const std::vector< std::pair< std::string, std::string >> &dict_list, const std::string &dict_name) -> std::vector< std::pair< std::string, std::string >>::const_iterator
 Find dictionary path given the name. More...
 
NUSPELL_EXPORT auto append_default_dir_paths (std::vector< std::string > &paths) -> void
 
NUSPELL_EXPORT auto search_dir_for_dicts (const std::string &dir_path, std::vector< std::pair< std::string, std::string >> &dict_list) -> void
 
NUSPELL_EXPORT auto search_dirs_for_dicts (const std::vector< std::string > &dir_paths, std::vector< std::pair< std::string, std::string >> &dict_list) -> void
 
template<class Range >
 Subrange (const Range &r) -> Subrange< typename Range::const_iterator >
 
template<class Range >
 Subrange (Range &r) -> Subrange< typename Range::iterator >
 
template<class CharT >
auto swap (String_Set< CharT > &a, String_Set< CharT > &b)
 
template<class DataIter , class PatternIter , class FuncEq = std::equal_to<>>
auto match_simple_regex (DataIter data_first, DataIter data_last, PatternIter pat_first, PatternIter pat_last, FuncEq eq=FuncEq())
 
template<class DataRange , class PatternRange , class FuncEq = std::equal_to<>>
auto match_simple_regex (const DataRange &data, const PatternRange &pattern, FuncEq eq=FuncEq())
 
auto match_compund_rule (const std::vector< const Flag_Set * > &words_data, const std::u16string &pattern)
 
auto & operator|= (Suggester::High_Quality_Sugs &lhs, Suggester::High_Quality_Sugs rhs)
 
auto u8_is_cp_error (int32_t cp) -> bool
 
template<class Range >
auto u8_advance_cp (const Range &str, size_t &i, int32_t &cp) -> void
 
template<class Range >
auto u8_advance_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto u8_reverse_cp (const Range &str, size_t &i, int32_t &cp) -> void
 
template<class Range >
auto u8_reverse_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto u8_write_cp_and_advance (Range &buf, size_t &i, int32_t cp, bool &error) -> void
 
template<class Range >
auto valid_u8_advance_cp (const Range &str, size_t &i, char32_t &cp) -> void
 
template<class Range >
auto valid_u8_advance_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto valid_u8_reverse_cp (const Range &str, size_t &i, char32_t &cp) -> void
 
template<class Range >
auto valid_u8_reverse_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto valid_u8_write_cp_and_advance (Range &buf, size_t &i, char32_t cp) -> void
 
auto u16_is_cp_error (int32_t cp) -> bool
 
template<class Range >
auto u16_advance_cp (const Range &str, size_t &i, int32_t &cp) -> void
 
template<class Range >
auto u16_advance_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto u16_reverse_cp (const Range &str, size_t &i, int32_t &cp) -> void
 
template<class Range >
auto u16_reverse_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto u16_write_cp_and_advance (Range &buf, size_t &i, int32_t cp, bool &error) -> void
 
template<class Range >
auto valid_u16_advance_cp (const Range &str, size_t &i, char32_t &cp) -> void
 
template<class Range >
auto valid_u16_advance_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto valid_u16_reverse_cp (const Range &str, size_t &i, char32_t &cp) -> void
 
template<class Range >
auto valid_u16_reverse_index (const Range &str, size_t &i) -> void
 
template<class Range >
auto valid_u16_write_cp_and_advance (Range &buf, size_t &i, char32_t cp) -> void
 
auto u8_swap_adjacent_cp (std::string &str, size_t i1, size_t i2, size_t i3) -> size_t
 
auto u8_swap_cp (std::string &str, U8_CP_Pos pos1, U8_CP_Pos pos2) -> std::pair< size_t, size_t >
 
template<class Range >
auto u8_next_cp (const Range &str, size_t i) -> Idx_And_Next_CP
 
template<class Range >
auto u8_next_index (const Range &str, size_t i) -> size_t
 
template<class Range >
auto u8_prev_cp (const Range &str, size_t i) -> Idx_And_Prev_CP
 
template<class Range >
auto u8_prev_index (const Range &str, size_t i) -> size_t
 
template<class Range >
auto u8_write_cp (Range &buf, size_t i, int32_t cp) -> Write_CP_Idx_and_Error
 
template<class Range >
auto valid_u8_next_cp (const Range &str, size_t i) -> Idx_And_Next_CP_Valid
 
template<class Range >
auto valid_u8_next_index (const Range &str, size_t i) -> size_t
 
template<class Range >
auto valid_u8_prev_cp (const Range &str, size_t i) -> Idx_And_Prev_CP_Valid
 
template<class Range >
auto valid_u8_prev_index (const Range &str, size_t i) -> size_t
 
template<class Range >
auto valid_u8_write_cp (Range &buf, size_t i, int32_t cp) -> size_t
 
auto split (std::string_view s, char sep, std::vector< std::string > &out) -> std::vector< std::string > &
 
auto split_on_any_of (std::string_view s, const char *sep, std::vector< std::string > &out) -> std::vector< std::string > &
 
auto utf32_to_utf8 (std::u32string_view in, std::string &out) -> void
 
auto utf32_to_utf8 (std::u32string_view in) -> std::string
 
auto valid_utf8_to_32 (std::string_view in, std::u32string &out) -> void
 
auto valid_utf8_to_32 (std::string_view in) -> std::u32string
 
auto utf8_to_16 (std::string_view in) -> std::u16string
 
bool utf8_to_16 (std::string_view in, std::u16string &out)
 
bool validate_utf8 (string_view s)
 
auto is_all_ascii (std::string_view s) -> bool
 
auto latin1_to_ucs2 (std::string_view s) -> std::u16string
 
auto latin1_to_ucs2 (std::string_view s, std::u16string &out) -> void
 
auto is_all_bmp (std::u16string_view s) -> bool
 
auto to_upper_ascii (std::string &s) -> void
 
auto to_upper (std::string_view in, const icu::Locale &loc) -> std::string
 
auto to_title (std::string_view in, const icu::Locale &loc) -> std::string
 
auto to_lower (std::string_view in, const icu::Locale &loc) -> std::string
 
auto to_upper (string_view in, const icu::Locale &loc, string &out) -> void
 
auto to_title (string_view in, const icu::Locale &loc, string &out) -> void
 
auto to_lower (u32string_view in, const icu::Locale &loc, u32string &out) -> void
 
auto to_lower (string_view in, const icu::Locale &loc, string &out) -> void
 
auto to_lower_char_at (std::string &s, size_t i, const icu::Locale &loc) -> void
 
auto to_title_char_at (std::string &s, size_t i, const icu::Locale &loc) -> void
 
auto classify_casing (string_view s) -> Casing
 
auto has_uppercase_at_compound_word_boundary (string_view word, size_t i) -> bool
 
auto replace_ascii_char (string &s, char from, char to) -> void
 
auto erase_chars (string &s, string_view erase_chars) -> void
 
auto is_number (string_view s) -> bool
 
auto count_appereances_of (string_view haystack, string_view needles) -> size_t
 
auto validate_utf8 (std::string_view s) -> bool
 
auto to_upper (std::string_view in, const icu::Locale &loc, std::string &out) -> void
 
auto to_title (std::string_view in, const icu::Locale &loc, std::string &out) -> void
 
auto to_lower (std::u32string_view in, const icu::Locale &loc, std::u32string &out) -> void
 
auto to_lower (std::string_view in, const icu::Locale &loc, std::string &out) -> void
 
NUSPELL_EXPORT auto classify_casing (std::string_view s) -> Casing
 
auto has_uppercase_at_compound_word_boundary (std::string_view word, size_t i) -> bool
 
auto replace_ascii_char (std::string &s, char from, char to) -> void
 
auto erase_chars (std::string &s, std::string_view erase_chars) -> void
 
NUSPELL_EXPORT auto is_number (std::string_view s) -> bool
 
auto count_appereances_of (std::string_view haystack, std::string_view needles) -> size_t
 
auto begins_with (std::string_view haystack, std::string_view needle) -> bool
 
auto ends_with (std::string_view haystack, std::string_view needle) -> bool
 
template<class T >
auto begin_ptr (T &x)
 
template<class T >
auto end_ptr (T &x)
 

Variables

const auto PATHSEP = ':'
 
const auto DIRSEP = '/'
 
constexpr auto u8_max_cp_length = U8_MAX_LENGTH
 
constexpr auto u16_max_cp_length = U16_MAX_LENGTH
 

Detailed Description

Library main namespace with version number attached.

This inline namespace is used for ABI versioning. It is the same as the major verison. Look up on the Internet to see what is it for (ABI versioning mostly). Client code should never mention this inline namespace.

Function Documentation

◆ append_default_dir_paths()

auto nuspell::v5::append_default_dir_paths ( std::vector< string > &  paths) -> void

Append the paths of the default directories to be searched for dictionaries.

Parameters
pathsvector of directory paths to append to

◆ append_libreoffice_dir_paths()

NUSPELL_EXPORT auto nuspell::v5::append_libreoffice_dir_paths ( std::vector< std::string > &  paths) -> void

Append the paths of the LibreOffice's directories to be searched for dictionaries.

Warning
This function shall not be called from LibreOffice or modules that may end up being used by LibreOffice. It is mainly intended to be used by the CLI tool.
Parameters
pathsvector of directory paths to append to

◆ find_dictionary()

NUSPELL_EXPORT auto nuspell::v5::find_dictionary ( const std::vector< std::pair< std::string, std::string >> &  dict_list,
const std::string &  dict_name 
) -> std::vector< std::pair< std::string, std::string >>::const_iterator

Find dictionary path given the name.

Find the first dictionary whose name matches dict_name.

Parameters
dict_listvector of pairs with name and paths
dict_namedictionary name
Returns
iterator of dict_list that points to the found dictionary or end if not found.

◆ search_default_dirs_for_dicts()

NUSPELL_EXPORT auto nuspell::v5::search_default_dirs_for_dicts ( std::vector< std::pair< std::string, std::string >> &  dict_list) -> void

Search the default directories for dictionaries.

See also
append_default_dir_paths()
search_dirs_for_dicts()
Parameters
dict_listvector to append the found dictionaries to

◆ search_dir_for_dicts()

auto nuspell::v5::search_dir_for_dicts ( const string &  dir_path,
vector< pair< string, string >> &  dict_list 
) -> void

Search a directory for dictionaries.

This function searches the directory for files that represent a dictionary and for each one found it appends the pair of dictionary name and filepath to dictionary, both without the filename extension (.aff or .dic).

For example for the files /dict/dir/en_US.dic and /dict/dir/en_US.aff the following pair will be appended ("en_US", "/dict/dir/en_US").

Todo:
At some point this API should be made to be more strongly typed. Instead of using that pair of strings to represent the dictionary files, a new class should be created with three public functions, getters, that would return the name, the path to the .aff file (with filename extension to avoid confusions) and the path to the .dic file. The C++ 17 std::filesystem::path should probably be used. It is unspecified to the public what this class holds privately, but it should probably hold only one path to the aff file. For the directory paths, it is simple, just use the type std::filesystem::path. When this API is created, the same function names should be used, added as overloads. The old API should be marked as deprecated. This should be done when we start requiring GCC 9 which supports C++ 17 filesystem out of the box. GCC 8 has this too, but it is somewhat experimental and requires manually linking to additional static library.
Parameters
dir_pathpath to directory
dict_listvector to append the found dictionaries to

◆ search_dirs_for_dicts()

auto nuspell::v5::search_dirs_for_dicts ( const std::vector< string > &  dir_paths,
std::vector< std::pair< string, string >> &  dict_list 
) -> void

Search the directories for dictionaries.

See also
search_dir_for_dicts()
Parameters
dir_pathslist of paths to directories
dict_listvector to append the found dictionaries to