| | #include "M2Scorer.h" |
| |
|
| | #include <algorithm> |
| | #include <fstream> |
| | #include <stdexcept> |
| | #include <sstream> |
| | #include <cstdlib> |
| |
|
| | #include <boost/lexical_cast.hpp> |
| |
|
| |
|
| | using namespace std; |
| |
|
| | namespace MosesTuning |
| | { |
| |
|
| | M2Scorer::M2Scorer(const string& config) |
| | : StatisticsBasedScorer("M2Scorer", config), |
| | beta_(Scan<float>(getConfig("beta", "0.5"))), |
| | max_unchanged_words_(Scan<int>(getConfig("max_unchanged_words", "2"))), |
| | truecase_(Scan<bool>(getConfig("truecase", "false"))), |
| | verbose_(Scan<bool>(getConfig("verbose", "false"))), |
| | m2_(max_unchanged_words_, beta_, truecase_) |
| | {} |
| |
|
| | void M2Scorer::setReferenceFiles(const vector<string>& referenceFiles) |
| | { |
| | for(size_t i = 0; i < referenceFiles.size(); ++i) { |
| | m2_.ReadM2(referenceFiles[i]); |
| | break; |
| | } |
| | } |
| |
|
| | void M2Scorer::prepareStats(size_t sid, const string& text, ScoreStats& entry) |
| | { |
| | string sentence = trimStr(this->preprocessSentence(text)); |
| | std::vector<ScoreStatsType> stats(4, 0); |
| | m2_.SufStats(sentence, sid, stats); |
| | entry.set(stats); |
| | } |
| |
|
| | float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const |
| | { |
| |
|
| | if (comps.size() != NumberOfScores()) { |
| | throw runtime_error("Size of stat vector for M2Scorer is not " + NumberOfScores()); |
| | } |
| |
|
| | float beta = beta_; |
| |
|
| |
|
| | float p = 0.0; |
| | float r = 0.0; |
| | float f = 0.0; |
| |
|
| | if(comps[1] != 0) |
| | p = comps[0] / (double)comps[1]; |
| | else |
| | p = 1.0; |
| |
|
| | if(comps[2] != 0) |
| | r = comps[0] / (double)comps[2]; |
| | else |
| | r = 1.0; |
| |
|
| | float denom = beta * beta * p + r; |
| | if(denom != 0) |
| | f = (1.0 + beta * beta) * p * r / denom; |
| | else |
| | f = 0.0; |
| |
|
| | if(verbose_) |
| | std::cerr << comps[0] << " " << comps[1] << " " << comps[2] << std::endl; |
| |
|
| | if(verbose_) |
| | std::cerr << p << " " << r << " " << f << std::endl; |
| |
|
| | return f; |
| | } |
| |
|
| | float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const |
| | { |
| | return comps[3]; |
| | } |
| |
|
| | std::vector<ScoreStatsType> randomStats(float decay, int max) |
| | { |
| | int gold = rand() % max; |
| | int prop = rand() % max; |
| | int corr = 0.0; |
| |
|
| | if(std::min(prop, gold) > 0) |
| | corr = rand() % std::min(prop, gold); |
| |
|
| | |
| |
|
| | std::vector<ScoreStatsType> stats(3, 0.0); |
| | stats[0] = corr * decay; |
| | stats[1] = prop * decay; |
| | stats[2] = gold * decay; |
| |
|
| | return stats; |
| | } |
| |
|
| | float sentenceM2(const std::vector<ScoreStatsType>& stats) |
| | { |
| | float beta = 0.5; |
| |
|
| | std::vector<ScoreStatsType> smoothStats(3, 0.0); |
| | smoothStats[0] += stats[0]; |
| | smoothStats[1] += stats[1]; |
| | smoothStats[2] += stats[2]; |
| |
|
| | float p = 0.0; |
| | float r = 0.0; |
| | float f = 0.0; |
| |
|
| | if(smoothStats[1] != 0) |
| | p = smoothStats[0] / smoothStats[1]; |
| | else |
| | p = 1.0; |
| |
|
| | if(smoothStats[2] != 0) |
| | r = smoothStats[0] / smoothStats[2]; |
| | else |
| | r = 1.0; |
| |
|
| | float denom = beta * beta * p + r; |
| | if(denom != 0) |
| | f = (1.0 + beta * beta) * p * r / denom; |
| | else |
| | f = 0.0; |
| |
|
| | return f; |
| | } |
| |
|
| | } |
| |
|