#include <SimpleKneser.hh>
Public Types | |
| typedef SymbolMap< std::string, int > | SymbolMap |
| typedef std::vector< int > | Ngram |
| typedef std::vector< std::string > | StrNgram |
| typedef std::vector< int > | IntVec |
| typedef std::vector< float > | FloatVec |
| typedef std::map< Ngram, int > | IntMap |
| typedef std::map< Ngram, float > | FloatMap |
| typedef std::map< Ngram, FloatInt > | FloatIntMap |
Public Member Functions | |
| SimpleKneser () | |
| SimpleKneser (const std::string &str) | |
| void | init () |
| void | set_discounts (const std::string &str) |
| float | get_discount (unsigned int order) const |
| float | get_beta_discount (unsigned int order) const |
| Ngram | ngram (const std::string &str) const |
| int | get_count (const Ngram &ngram) const |
| int | get_sum_nonzero_xg (const Ngram &ngram) const |
| int | get_sum_nonzero_xgx (const Ngram &ngram) const |
| int | get_sum_nonzero_gx (const Ngram &ngram) const |
| int | get_sum_xg_not_pruned (const Ngram &ngram) const |
| int | get_sum_nonzero_xg_not_pruned (const Ngram &ngram) const |
| int | get_sum_nonzero_gx_not_pruned (const Ngram &ngram) const |
| float | get_d1 (const Ngram &ngram) const |
| float | get_d2 (const Ngram &ngram) const |
| const FloatInt & | get_d2_pair (const Ngram &ngram) const |
| FloatInt & | get_d2_pair (const Ngram &ngram) |
| float | get_beta_numerator (const Ngram &ngram) const |
| float | get_beta_denominator (const Ngram &ngram) const |
| void | add_count (const Ngram &ngram, int count) |
| void | read_counts (FILE *file) |
| void | compute_modified_counts () |
| float | ngram_prob (Ngram ngram) const |
| void | compute_d1 () |
| void | compute_d2 () |
| void | compute_beta_denominator () |
| bool | is_pruned (const Ngram &ngram) const |
| void | prune_ngram (Ngram ngram) |
| Prune ngram (and possible children), and modify d2 measure for the parent ngrams. | |
| void | prune (float threshold) |
| Ngram | parent (const Ngram &ngram) const |
| Ngram | backoff (const Ngram &ngram) const |
| float | inter_kn (const Ngram &ngram) const |
| float | prob_kn_lower (Ngram ngram) const |
| float | prob_kn (const Ngram &ngram) const |
| float | inter_beta (const Ngram &ngram) const |
| float | prob_beta_lower (Ngram ngram) const |
| float | prob_beta (const Ngram &ngram) const |
| std::string | ngram_str (const Ngram &ngram) |
| void | write_beta_arpa (FILE *file) |
Private Attributes | |
| std::string | m_sentence_start_str |
| int | m_sentence_start_id |
| int | m_num_events |
| int | m_progress_skip |
| SymbolMap | m_symbol_map |
| FloatVec | m_discounts |
| FloatVec | m_beta_discounts |
| IntMap | m_counts |
| IntMap | m_sum_nonzero_xg |
| IntMap | m_sum_nonzero_xgx |
| IntMap | m_sum_nonzero_gx |
| IntMap | m_sum_xg_not_pruned |
| IntMap | m_sum_nonzero_xg_not_pruned |
| IntMap | m_sum_nonzero_gx_not_pruned |
| FloatMap | m_beta_denominator |
| FloatMap | m_d1 |
| FloatIntMap | m_d2 |
Classes | |
| struct | FloatInt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
||||||||||||
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Prune ngram (and possible children), and modify d2 measure for the parent ngrams. The pruned ngrams are marked by setting the integer denominator of d2 to zero.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1.4.6