|
Public Types |
| typedef Trie< Array > | Trie |
| typedef Trie::Iterator | Iterator |
typedef SymbolMap< std::string,
int > | SymbolMap |
| typedef std::vector< float > | FloatVec |
| typedef std::vector< int > | IntVec |
| typedef std::vector< int > | Ngram |
Public Member Functions |
| | Kneser () |
| | Default constructor.
|
| void | set_d1_model (int model) |
| | Set model used in d1 computation (0 = kn, 1 = abs).
|
| void | set_d1_weight_model (int model) |
| | Set model used as ngram weight in d1 computation (0 = kn, 1 = abs).
|
| const SymbolMap & | symbol_map () const |
| | Constant access to symbol map.
|
| int | sentence_start_id () const |
| | Index of the sentence start symbol.
|
| int | sentence_end_id () const |
| | Index of the sentence end symbol.
|
| Iterator | root () const |
| | Iterator at the root of the trie.
|
| u64 | num_ngrams () const |
| | Return the number of ngrams in the unpruned model.
|
| u64 | num_active_ngrams () const |
| | Return the number of active ngrams in the pruned model.
|
| template<class T> |
| Iterator | find (const std::vector< T > &vec) const |
| | Find an ngram.
|
| Iterator | find (const std::string &str) const |
| | Find an ngram.
|
| float | ngram_prob (Iterator it) const |
| | Probability of ngram P(abc) = P(a) P(b|a) P(c|ab).
|
| float | prob_beta_lower (Iterator it) const |
| | Compute conditional probability P(w|h) using only lower-order probabilities of the beta distribution (with the interpolation weight of h).
|
| Ngram | ngram (const std::string &str) const |
| | Convert a string to vector of symbol indices.
|
| float | prob_beta_full (const Iterator &it) const |
| | Compute conditional probability P(w|h) for ngrams found explicitly in the model using the beta distribution.
|
| float | prob_beta_full (Ngram ngram) const |
| | Compute conditional probability P(w|h) for arbitrary ngram using the beta distribution (Warning: implementation may be quite slow).
|
| float | prob_lower (Iterator it) const |
| | Compute conditional probability P(w|h) using only lower-order probabilities (with the interpolation weight of h).
|
| float | prob_full (const Iterator &it, float *lower_prob=NULL) const |
| | Compute conditional probability P(w|h).
|
| float | prob_abs_lower (Iterator it) const |
| | Compute conditional probability P(w|h) using unmodified counts and using only lower-order probabilities (with the interpolation weight of h).
|
| float | prob_abs_full (const Iterator &it, float *lower_prob=NULL) const |
| | Compute conditional probability P(w|h) using unmodified counts.
|
| bool | is_pruned (const Iterator &it) const |
| | Check if iterator is pruned.
|
| u32 | get_count (const Iterator &it) const |
| | Get ngram count at iterator returning.
|
| u32 | sum_gx (const Iterator &it) const |
| u32 | sum_nonzero_xg (const Iterator &it) const |
| u32 | sum_nonzero_gx (const Iterator &it) const |
| u32 | sum_nonzero_xgx (const Iterator &it) const |
| float | get_beta_numerator (const Iterator &it) const |
| float | get_beta_denominator (const Iterator &it) const |
| | Return the beta denominator (1 for highest-order ngrams).
|
| float | get_beta_interpolation_numerator (const Iterator &it) const |
| float | get_d1 (const Iterator &it) const |
| | Return the d1 measure of an ngram.
|
| float | get_d2 (const Iterator &it) const |
| | Return the unnormalized d2 measure of an ngram.
|
| int | num_active_children (Iterator it) const |
| | Compute the number of active (not pruned) children.
|
| template<class T> |
| std::string | ngram_str (const std::vector< T > &ngram) const |
| | Printable string of a ngram.
|
| void | write_binary_counts (FILE *file) const |
| | Write trie structure, counts, modified counts, sum_nonzero_xg, sum_nonzero_xgx and sum_nonzero_gx in a file.
|
| void | read_binary_counts (FILE *file) |
| | Read trie structure, counts, modified counts, sum_nonzero_xg, sum_nonzero_xgx and sum_nonzero_gx from a file.
|
| void | write_binary_d1d2 (FILE *file) const |
| | Write d1 and d2.
|
| void | read_binary_d1d2 (FILE *file) |
| | Read d1 and d2.
|
| void | write_arpa (FILE *file) const |
| | Write model in ARPA format.
|
| void | write_beta_arpa (FILE *file) const |
| | Write model in ARPA format.
|
| void | reserve_orders (unsigned int orders) |
| | Reserve space for orders to avoid reallocing.
|
| void | read_counts (FILE *file, bool integer_symbols=false) |
| | Read counts from an ASCII file.
|
| void | compute_sums () |
| | Compute Kneser-Ney modified counts.
|
| void | compute_d1 () |
| | Compute d1 measure for all ngrams (n > 1) using log10.
|
| void | compute_d2_full () |
| | Compute d2 measure for each node when no nodes are yet pruned.
|
| void | compute_d2_trick () |
| | Compute d2 measure for all ngrams (n > 1).
|
| void | prune_ngram (Iterator it) |
| | Prune ngram, modify parents' d2 measure, and remove children.
|
| void | prune_threshold (float threshold) |
| | Prune ngrams (n > 1) whose d2 is under threshold.
|
| void | prune (unsigned int ngrams) |
| | Prune ngrams (n > 1) according to the d2 measure.
|
| void | compute_beta_numerator_terms () |
| | Precompute numerator terms for Kneser's improved back-off distribution.
|
| void | compute_beta_denominator () |
| | Precompute denominators for Kneser's improved back-off distribution.
|
| int | compute_active_children (Iterator it, int *pruned_counts=NULL) |
| | Compute number of active children and sum of pruned child counts.
|
| void | compute_beta_interpolation_numerator () |
| | Compute beta distribution and interpolation.
|
| Iterator | add (const std::vector< int > &vec, int value) |
| | Increment count.
|
| void | set_discount (unsigned int order, float value) |
| | Set the discounting parameter.
|
| float | get_discount (unsigned int order) const |
| | Get the discount parameters.
|
| float | get_beta_discount (unsigned int order) const |
| | Get the beta discount parameters.
|
| float | interpolation (const Iterator &it) const |
| float | interpolation_abs (const Iterator &it) const |
| void | set_count (const Iterator &it, u32 value) |
| | Set the count of the ngram at iterator returning.
|
| std::string | debug_sum_nonzero_xg_str () |
| void | debug_write_counts (FILE *file) |
Private Member Functions |
| float | get_value (const std::vector< FloatVec > &arrays, const Iterator &it) const |
| int | get_value (const std::vector< IntVec > &arrays, const Iterator &it) const |
| u32 | get_value (const std::vector< Array > &arrays, const Iterator &it) const |
| void | set_value (std::vector< FloatVec > &arrays, const Iterator &it, float value) |
| void | set_value (std::vector< IntVec > &arrays, const Iterator &it, int value) |
| void | set_value (std::vector< Array > &arrays, const Iterator &it, u32 value) |
| void | add_value (std::vector< Array > &arrays, const Iterator &it, u32 value) |
| void | add_value (std::vector< FloatVec > &arrays, const Iterator &it, float value) |
| void | add_value (std::vector< IntVec > &arrays, const Iterator &it, int value) |
| void | sub_value (std::vector< Array > &arrays, const Iterator &it, u32 value) |
Private Attributes |
| int | m_progress_skip |
| | Skip between progress reports.
|
| Trie | m_trie |
| | Trie containing the ngrams.
|
| std::vector< int > | m_num_ngrams |
| | Number of ngrams in the model (updated in pruning).
|
| std::vector< IntVec > | m_counts |
| | Real counts for each order.
|
| std::vector< IntVec > | m_sum_gx |
| | Sum of counts.
|
| int | m_sum_gx0 |
| | Sum of counts for unigrams.
|
| std::vector< IntVec > | m_sum_nonzero_xg |
| | Modified counts for each order.
|
| std::vector< IntVec > | m_sum_nonzero_gx |
| | Modified normalization counts for each order.
|
| int | m_sum_nonzero_gx0 |
| | Modified normalization counts for unigrams.
|
| std::vector< IntVec > | m_sum_nonzero_xgx |
| | Precomputed sum of modified counts for each order.
|
| int | m_sum_nonzero_xgx0 |
| | Precomputed sum of modified counts for unigrams.
|
| std::vector< float > | m_discounts |
| | Discount parameters for each order (index 0 is for unigrams).
|
| std::vector< float > | m_beta_discounts |
| | Beta discount parameters for each order (index 0 is for unigrams).
|
| SymbolMap | m_symbol_map |
| | Symbol set for mapping between symbols and integers.
|
| std::string | m_sentence_start_str |
| | String of the sentence start symbol.
|
| std::string | m_sentence_end_str |
| | String of the sentence end symbol.
|
| int | m_sentence_start_id |
| | Index of the sentence start symbol.
|
| int | m_sentence_end_id |
| | Index of the sentence end symbol.
|
|
| int | m_d1_weight_model |
| | Probability distribution used for computing ngram weight in d1 computation (0 = kneser-ney, 1 = absolute-discounting, 2 = counts).
|
| int | m_d1_model |
| | Probability distribution used in d1 computation (0 = kneser-ney, 1 = absolute-discounting).
|
|
| std::vector< Array > | m_pruned |
| std::vector< FloatVec > | m_d1 |
| std::vector< FloatVec > | m_d2 |
| std::vector< IntVec > | m_d2_norm |
| std::vector< IntVec > | m_sum_xg_not_pruned |
| std::vector< IntVec > | m_sum_nonzero_xg_not_pruned |
| std::vector< FloatVec > | m_beta_denominator |
| | Precomputed denominator for Kneser's improved back-off distribution.
|
| float | m_beta_denominator0 |
| | Precomputed unigram-denominator for Kneser's improved back-off distribution.
|
| std::vector< FloatVec > | m_beta_interpolation_numerator |
| | Precomputed interpolation weights for Kneser's improved back-off distribution.
|
| float | m_beta_interpolation_numerator0 |
| | Precomputed interpolation weight for zero-grams.
|
Classes |
| struct | D2Norm |
| struct | OrderIndex |
| struct | PruneCompare |
Statistical Language Modeling Using a Variable Context Length. EUROSPEECH 1997.