| #include <sstream> |
| #include "EditOps.h" |
| #include "moses/Phrase.h" |
| #include "moses/TargetPhrase.h" |
| #include "moses/Hypothesis.h" |
| #include "moses/ChartHypothesis.h" |
| #include "moses/ScoreComponentCollection.h" |
| #include "moses/TranslationOption.h" |
| #include "util/string_piece_hash.hh" |
| #include "util/exception.hh" |
|
|
| #include <functional> |
|
|
| #include <boost/foreach.hpp> |
| #include <boost/algorithm/string.hpp> |
|
|
| #include "Diffs.h" |
|
|
| namespace Moses |
| { |
|
|
| using namespace std; |
|
|
| std::string ParseScores(const std::string &line, const std::string& defaultScores) |
| { |
| std::vector<std::string> toks = Tokenize(line); |
| UTIL_THROW_IF2(toks.empty(), "Empty line"); |
|
|
| for (size_t i = 1; i < toks.size(); ++i) { |
| std::vector<std::string> args = TokenizeFirstOnly(toks[i], "="); |
| UTIL_THROW_IF2(args.size() != 2, |
| "Incorrect format for feature function arg: " << toks[i]); |
|
|
| if (args[0] == "scores") { |
| return args[1]; |
| } |
| } |
| return defaultScores; |
| } |
|
|
| EditOps::EditOps(const std::string &line) |
| : StatelessFeatureFunction(ParseScores(line, "dis").size(), line) |
| , m_factorType(0), m_chars(false), m_scores(ParseScores(line, "dis")) |
| { |
| std::cerr << "Initializing EditOps feature.." << std::endl; |
| ReadParameters(); |
| } |
|
|
| void EditOps::SetParameter(const std::string& key, const std::string& value) |
| { |
| if (key == "factor") { |
| m_factorType = Scan<FactorType>(value); |
| } else if (key == "chars") { |
| m_chars = Scan<bool>(value); |
| } else if (key == "scores") { |
| m_scores = value; |
| } else { |
| StatelessFeatureFunction::SetParameter(key, value); |
| } |
| } |
|
|
| void EditOps::Load() |
| { } |
|
|
| void EditOps::EvaluateInIsolation(const Phrase &source |
| , const TargetPhrase &target |
| , ScoreComponentCollection &scoreBreakdown |
| , ScoreComponentCollection &estimatedFutureScore) const |
| { |
| ComputeFeatures(source, target, &scoreBreakdown); |
| } |
|
|
| void EditOps::ComputeFeatures( |
| const Phrase &source, |
| const TargetPhrase& target, |
| ScoreComponentCollection* accumulator) const |
| { |
| std::vector<float> ops(GetNumScoreComponents(), 0); |
|
|
| if(m_chars) { |
| std::vector<FactorType> factors; |
| factors.push_back(m_factorType); |
|
|
| std::string sourceStr = source.GetStringRep(factors); |
| std::string targetStr = target.GetStringRep(factors); |
|
|
| AddStats(sourceStr, targetStr, m_scores, ops); |
| } else { |
| std::vector<std::string> sourceTokens; |
| |
| for(size_t i = 0; i < source.GetSize(); ++i) { |
| if(!source.GetWord(i).IsNonTerminal()) |
| sourceTokens.push_back(source.GetWord(i).GetFactor(m_factorType)->GetString().as_string()); |
| |
| } |
| |
|
|
| std::vector<std::string> targetTokens; |
| |
| for(size_t i = 0; i < target.GetSize(); ++i) { |
| if(!target.GetWord(i).IsNonTerminal()) |
| targetTokens.push_back(target.GetWord(i).GetFactor(m_factorType)->GetString().as_string()); |
| |
| } |
| |
|
|
| AddStats(sourceTokens, targetTokens, m_scores, ops); |
| } |
|
|
| accumulator->PlusEquals(this, ops); |
| } |
|
|
| bool EditOps::IsUseable(const FactorMask &mask) const |
| { |
| bool ret = mask[m_factorType]; |
| return ret; |
| } |
|
|
| } |
|
|