| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #include <limits> |
| #include <iostream> |
| #include <fstream> |
|
|
| #include "MaxEntSRI.h" |
| #include "moses/TypeDef.h" |
| #include "moses/Util.h" |
| #include "moses/FactorCollection.h" |
| #include "moses/Phrase.h" |
| #include "moses/StaticData.h" |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| #ifdef __APPLE__ |
| #define HAVE_ZOPEN |
| #endif |
|
|
| #include "Vocab.h" |
| #include "MEModel.h" |
|
|
| using namespace std; |
|
|
| namespace Moses |
| { |
| LanguageModelMaxEntSRI::LanguageModelMaxEntSRI(const std::string &line) |
| :LanguageModelSingleFactor(line) |
| ,m_srilmVocab(0) |
| ,m_srilmModel(0) |
| { |
| ReadParameters(); |
| } |
|
|
| LanguageModelMaxEntSRI::~LanguageModelMaxEntSRI() |
| { |
| delete m_srilmModel; |
| delete m_srilmVocab; |
| } |
|
|
| void LanguageModelMaxEntSRI::Load(AllOptions::ptr const& opts) |
| { |
| m_srilmVocab = new ::Vocab(); |
| m_srilmModel = new MEModel(*m_srilmVocab, m_nGramOrder); |
|
|
| m_srilmModel->skipOOVs() = false; |
|
|
| File file( m_filePath.c_str(), "r" ); |
| m_srilmModel->read(file); |
|
|
| |
| CreateFactors(); |
| m_unknownId = m_srilmVocab->unkIndex(); |
| } |
|
|
| void LanguageModelMaxEntSRI::CreateFactors() |
| { |
| |
| FactorCollection &factorCollection = FactorCollection::Instance(); |
|
|
| std::map<size_t, VocabIndex> lmIdMap; |
| size_t maxFactorId = 0; |
|
|
| VocabString str; |
| VocabIter iter(*m_srilmVocab); |
| while ( (str = iter.next()) != NULL) { |
| VocabIndex lmId = GetLmID(str); |
| size_t factorId = factorCollection.AddFactor(Output, m_factorType, str)->GetId(); |
| lmIdMap[factorId] = lmId; |
| maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; |
| } |
|
|
| size_t factorId; |
|
|
| m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_); |
| factorId = m_sentenceStart->GetId(); |
| lmIdMap[factorId] = GetLmID(BOS_); |
| maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; |
| m_sentenceStartWord[m_factorType] = m_sentenceStart; |
|
|
| m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_); |
| factorId = m_sentenceEnd->GetId(); |
| lmIdMap[factorId] = GetLmID(EOS_); |
| maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; |
| m_sentenceEndWord[m_factorType] = m_sentenceEnd; |
|
|
| |
| m_lmIdLookup.resize(maxFactorId+1); |
|
|
| fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId); |
|
|
| map<size_t, VocabIndex>::iterator iterMap; |
| for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap) { |
| m_lmIdLookup[iterMap->first] = iterMap->second; |
| } |
| } |
|
|
| VocabIndex LanguageModelMaxEntSRI::GetLmID( const std::string &str ) const |
| { |
| return m_srilmVocab->getIndex( str.c_str(), m_unknownId ); |
| } |
| VocabIndex LanguageModelMaxEntSRI::GetLmID( const Factor *factor ) const |
| { |
| size_t factorId = factor->GetId(); |
| return ( factorId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[factorId]; |
| } |
|
|
| LMResult LanguageModelMaxEntSRI::GetValue(VocabIndex wordId, VocabIndex *context) const |
| { |
| LMResult ret; |
| ret.score = FloorScore(TransformLMScore(m_srilmModel->wordProb( wordId, context))); |
| ret.unknown = (wordId == m_unknownId); |
| return ret; |
| } |
|
|
| LMResult LanguageModelMaxEntSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState) const |
| { |
| LMResult ret; |
| FactorType factorType = GetFactorType(); |
| size_t count = contextFactor.size(); |
| if (count <= 0) { |
| if(finalState) |
| *finalState = NULL; |
| ret.score = 0.0; |
| ret.unknown = false; |
| return ret; |
| } |
|
|
| |
| VocabIndex ngram[count + 1]; |
| for (size_t i = 0 ; i < count - 1 ; i++) { |
| ngram[i+1] = GetLmID((*contextFactor[count-2-i])[factorType]); |
| } |
| ngram[count] = Vocab_None; |
|
|
| UTIL_THROW_IF2((*contextFactor[count-1])[factorType] == NULL, |
| "No factor " << factorType << " at position " << (count-1)); |
| |
| VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]); |
| ret = GetValue(lmId, ngram+1); |
|
|
| if (finalState) { |
| ngram[0] = lmId; |
| unsigned int dummy; |
| *finalState = m_srilmModel->contextID(ngram, dummy); |
| } |
| return ret; |
| } |
|
|
| } |
|
|
|
|
|
|
|
|