| | |
| | |
| | |
| |
|
| | #include <cmath> |
| | #include <iostream> |
| | #include <fstream> |
| | #include <sstream> |
| | #include <string> |
| |
|
| | #include <n_gram.h> |
| | #include <lmtable.h> |
| |
|
| | #include "phrasetable.h" |
| | #include "phraselm.h" |
| |
|
| | void PhraseLanguageModel::attach(PhraseInfoList &pilist) |
| | { |
| | phrase_info_list_ = &pilist; |
| | score_idx_ = pilist.register_data(1); |
| | } |
| |
|
| | void PhraseLanguageModel::compute_statistic() |
| | { |
| | compute_lmscores(*phrase_info_list_, false); |
| | } |
| |
|
| | void PhraseLanguageModel::compute_lmscores(PhraseInfoList &phrase_info_list, bool closed_world) |
| | { |
| | lmtable lm; |
| | std::ifstream lmstream(lmfile_.c_str()); |
| | lm.load(lmstream, lmfile_.c_str(), NULL, 0); |
| | lm.setlogOOVpenalty(10000000); |
| |
|
| | assert(!computation_done_); |
| |
|
| | Score marginal_score = .0; |
| | for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) { |
| | PhraseInfo &pi = *it; |
| | ngram ng(lm.getDict()); |
| | Score lmscore = 0; |
| | for(PhraseText::const_string_iterator it = pi.get_phrase().string_begin(); it != pi.get_phrase().string_end(); it++) { |
| | ng.pushw(it->c_str()); |
| | lmscore += lm.clprob(ng); |
| | } |
| |
|
| | pi.data(score_idx_) = exp10(lmscore); |
| | marginal_score += pi.data(score_idx_); |
| | } |
| |
|
| | if(closed_world) |
| | for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) { |
| | PhraseInfo &pi = *it; |
| | pi.data(score_idx_) /= marginal_score; |
| | } |
| |
|
| | computation_done_ = true; |
| | } |
| |
|
| | void ClosedPhraseLanguageModel::compute_statistic() |
| | { |
| | compute_lmscores(*phrase_info_list_, true); |
| | } |
| |
|