|
#include <fstream> |
|
#include "OpSequenceModel.h" |
|
#include "osmHyp.h" |
|
#include "moses/Util.h" |
|
#include "util/exception.hh" |
|
|
|
using namespace std; |
|
using namespace lm::ngram; |
|
|
|
namespace Moses |
|
{ |
|
|
|
OpSequenceModel::OpSequenceModel(const std::string &line) |
|
:StatefulFeatureFunction(5, line ) |
|
{ |
|
sFactor = 0; |
|
tFactor = 0; |
|
numFeatures = 5; |
|
ReadParameters(); |
|
load_method = util::READ; |
|
} |
|
|
|
OpSequenceModel::~OpSequenceModel() |
|
{ |
|
delete OSM; |
|
} |
|
|
|
void OpSequenceModel :: readLanguageModel(const char *lmFile) |
|
{ |
|
string unkOp = "_TRANS_SLF_"; |
|
OSM = ConstructOSMLM(m_lmPath.c_str(), load_method); |
|
|
|
State startState = OSM->NullContextState(); |
|
State endState; |
|
unkOpProb = OSM->Score(startState,unkOp,endState); |
|
} |
|
|
|
|
|
void OpSequenceModel::Load(AllOptions::ptr const& opts) |
|
{ |
|
m_options = opts; |
|
readLanguageModel(m_lmPath.c_str()); |
|
} |
|
|
|
|
|
|
|
void OpSequenceModel:: EvaluateInIsolation(const Phrase &source |
|
, const TargetPhrase &targetPhrase |
|
, ScoreComponentCollection &scoreBreakdown |
|
, ScoreComponentCollection &estimatedScores) const |
|
{ |
|
|
|
osmHypothesis obj; |
|
obj.setState(OSM->NullContextState()); |
|
Bitmap myBitmap(source.GetSize()); |
|
vector <string> mySourcePhrase; |
|
vector <string> myTargetPhrase; |
|
vector<float> scores; |
|
vector <int> alignments; |
|
int startIndex = 0; |
|
int endIndex = source.GetSize(); |
|
|
|
const AlignmentInfo &align = targetPhrase.GetAlignTerm(); |
|
AlignmentInfo::const_iterator iter; |
|
|
|
for (iter = align.begin(); iter != align.end(); ++iter) { |
|
alignments.push_back(iter->first); |
|
alignments.push_back(iter->second); |
|
} |
|
|
|
for (size_t i = 0; i < targetPhrase.GetSize(); i++) { |
|
if (targetPhrase.GetWord(i).IsOOV() && sFactor == 0 && tFactor == 0) |
|
myTargetPhrase.push_back("_TRANS_SLF_"); |
|
else |
|
myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(tFactor)->GetString().as_string()); |
|
} |
|
|
|
for (size_t i = 0; i < source.GetSize(); i++) { |
|
mySourcePhrase.push_back(source.GetWord(i).GetFactor(sFactor)->GetString().as_string()); |
|
} |
|
|
|
obj.setPhrases(mySourcePhrase , myTargetPhrase); |
|
obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize()); |
|
obj.computeOSMFeature(startIndex,myBitmap); |
|
obj.calculateOSMProb(*OSM); |
|
obj.populateScores(scores,numFeatures); |
|
estimatedScores.PlusEquals(this, scores); |
|
|
|
} |
|
|
|
|
|
FFState* OpSequenceModel::EvaluateWhenApplied( |
|
const Hypothesis& cur_hypo, |
|
const FFState* prev_state, |
|
ScoreComponentCollection* accumulator) const |
|
{ |
|
const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase(); |
|
const Bitmap &bitmap = cur_hypo.GetWordsBitmap(); |
|
Bitmap myBitmap(bitmap); |
|
const Manager &manager = cur_hypo.GetManager(); |
|
const InputType &source = manager.GetSource(); |
|
|
|
osmHypothesis obj; |
|
vector <string> mySourcePhrase; |
|
vector <string> myTargetPhrase; |
|
vector<float> scores; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const Range & sourceRange = cur_hypo.GetCurrSourceWordsRange(); |
|
int startIndex = sourceRange.GetStartPos(); |
|
int endIndex = sourceRange.GetEndPos(); |
|
const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm(); |
|
|
|
|
|
vector <int> alignments; |
|
|
|
|
|
|
|
AlignmentInfo::const_iterator iter; |
|
|
|
for (iter = align.begin(); iter != align.end(); ++iter) { |
|
|
|
alignments.push_back(iter->first); |
|
alignments.push_back(iter->second); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = startIndex; i <= endIndex; i++) { |
|
myBitmap.SetValue(i,0); |
|
mySourcePhrase.push_back(source.GetWord(i).GetFactor(sFactor)->GetString().as_string()); |
|
|
|
} |
|
|
|
for (size_t i = 0; i < target.GetSize(); i++) { |
|
|
|
if (target.GetWord(i).IsOOV() && sFactor == 0 && tFactor == 0) |
|
myTargetPhrase.push_back("_TRANS_SLF_"); |
|
else |
|
myTargetPhrase.push_back(target.GetWord(i).GetFactor(tFactor)->GetString().as_string()); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
obj.setState(prev_state); |
|
obj.constructCepts(alignments,startIndex,endIndex,target.GetSize()); |
|
obj.setPhrases(mySourcePhrase , myTargetPhrase); |
|
obj.computeOSMFeature(startIndex,myBitmap); |
|
obj.calculateOSMProb(*OSM); |
|
obj.populateScores(scores,numFeatures); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accumulator->PlusEquals(this, scores); |
|
|
|
return obj.saveState(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
FFState* OpSequenceModel::EvaluateWhenApplied( |
|
const ChartHypothesis& , |
|
int , |
|
ScoreComponentCollection* accumulator) const |
|
{ |
|
UTIL_THROW2("Chart decoding not support by OpSequenceModel"); |
|
|
|
} |
|
|
|
const FFState* OpSequenceModel::EmptyHypothesisState(const InputType &input) const |
|
{ |
|
VERBOSE(3,"OpSequenceModel::EmptyHypothesisState()" << endl); |
|
|
|
State startState = OSM->BeginSentenceState(); |
|
|
|
return new osmState(startState); |
|
} |
|
|
|
std::string OpSequenceModel::GetScoreProducerWeightShortName(unsigned idx) const |
|
{ |
|
return "osm"; |
|
} |
|
|
|
std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const Phrase &target) const |
|
{ |
|
ParallelPhrase pp(source, target); |
|
std::map<ParallelPhrase, Scores>::const_iterator iter; |
|
iter = m_futureCost.find(pp); |
|
|
|
if (iter == m_futureCost.end()) { |
|
vector<float> scores(numFeatures, 0); |
|
scores[0] = unkOpProb; |
|
return scores; |
|
} else { |
|
const vector<float> &scores = iter->second; |
|
return scores; |
|
} |
|
} |
|
|
|
void OpSequenceModel::SetParameter(const std::string& key, const std::string& value) |
|
{ |
|
|
|
if (key == "path") { |
|
m_lmPath = value; |
|
} else if (key == "support-features") { |
|
if(value == "no") |
|
numFeatures = 1; |
|
else |
|
numFeatures = 5; |
|
} else if (key == "input-factor") { |
|
sFactor = Scan<int>(value); |
|
} else if (key == "output-factor") { |
|
tFactor = Scan<int>(value); |
|
} else if (key == "load") { |
|
if (value == "lazy") { |
|
load_method = util::LAZY; |
|
} else if (value == "populate_or_lazy") { |
|
load_method = util::POPULATE_OR_LAZY; |
|
} else if (value == "populate_or_read" || value == "populate") { |
|
load_method = util::POPULATE_OR_READ; |
|
} else if (value == "read") { |
|
load_method = util::READ; |
|
} else if (value == "parallel_read") { |
|
load_method = util::PARALLEL_READ; |
|
} else { |
|
UTIL_THROW2("Unknown KenLM load method " << value); |
|
} |
|
} else { |
|
StatefulFeatureFunction::SetParameter(key, value); |
|
} |
|
} |
|
|
|
bool OpSequenceModel::IsUseable(const FactorMask &mask) const |
|
{ |
|
bool ret = mask[0]; |
|
return ret; |
|
} |
|
|
|
} |
|
|