|
#include "InterpolatedScorer.h" |
|
#include "ScorerFactory.h" |
|
#include "Util.h" |
|
|
|
using namespace std; |
|
|
|
namespace MosesTuning |
|
{ |
|
|
|
|
|
|
|
|
|
InterpolatedScorer::InterpolatedScorer(const string& name, const string& config) |
|
: Scorer(name,config) |
|
{ |
|
|
|
string scorers = name; |
|
while (scorers.length() > 0) { |
|
string scorertype = ""; |
|
getNextPound(scorers, scorertype,","); |
|
Scorer *scorer = ScorerFactory::getScorer(scorertype,config); |
|
m_scorers.push_back(scorer); |
|
} |
|
if (m_scorers.size() == 0) { |
|
throw runtime_error("There are no scorers"); |
|
} |
|
cerr << "Number of scorers: " << m_scorers.size() << endl; |
|
|
|
|
|
string wtype = getConfig("weights",""); |
|
|
|
|
|
if (wtype.length() == 0) { |
|
float weight = 1.0 / m_scorers.size() ; |
|
|
|
for (size_t i = 0; i < m_scorers.size(); i ++) { |
|
m_scorer_weights.push_back(weight); |
|
} |
|
} else { |
|
float tot=0; |
|
|
|
while (wtype.length() > 0) { |
|
string scoreweight = ""; |
|
getNextPound(wtype,scoreweight,"+"); |
|
float weight = atof(scoreweight.c_str()); |
|
m_scorer_weights.push_back(weight); |
|
tot += weight; |
|
|
|
} |
|
|
|
if (tot != float(1)) { |
|
for (vector<float>::iterator it = m_scorer_weights.begin(); |
|
it != m_scorer_weights.end(); ++it) { |
|
*it /= tot; |
|
} |
|
} |
|
|
|
if (m_scorers.size() != m_scorer_weights.size()) { |
|
throw runtime_error("The number of weights does not equal the number of scorers!"); |
|
} |
|
} |
|
cerr << "The weights for the interpolated scorers are: " << endl; |
|
for (vector<float>::iterator it = m_scorer_weights.begin(); it < m_scorer_weights.end(); it++) { |
|
cerr << *it << " " ; |
|
} |
|
cerr <<endl; |
|
} |
|
|
|
bool InterpolatedScorer::useAlignment() const |
|
{ |
|
|
|
for (vector<Scorer*>::const_iterator itsc = m_scorers.begin(); itsc < m_scorers.end(); itsc++) { |
|
if ((*itsc)->useAlignment()) { |
|
|
|
return true; |
|
} |
|
} |
|
return false; |
|
}; |
|
|
|
void InterpolatedScorer::setScoreData(ScoreData* data) |
|
{ |
|
size_t last = 0; |
|
m_score_data = data; |
|
for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin(); |
|
itsc != m_scorers.end(); ++itsc) { |
|
int numScoresScorer = (*itsc)->NumberOfScores(); |
|
ScoreData* newData =new ScoreData(*itsc); |
|
for (size_t i = 0; i < data->size(); i++) { |
|
ScoreArray scoreArray = data->get(i); |
|
ScoreArray newScoreArray; |
|
size_t numNBest = scoreArray.size(); |
|
|
|
for (size_t j = 0; j < numNBest ; j++) { |
|
ScoreStats scoreStats = data->get(i, j); |
|
|
|
ScoreStats newScoreStats; |
|
for (size_t k = last; k < size_t(numScoresScorer + last); k++) { |
|
ScoreStatsType score = scoreStats.get(k); |
|
newScoreStats.add(score); |
|
} |
|
|
|
newScoreArray.add(newScoreStats); |
|
} |
|
newScoreArray.setIndex(i); |
|
newData->add(newScoreArray); |
|
} |
|
|
|
|
|
|
|
|
|
m_scorers_score_data.push_back(newData); |
|
|
|
(*itsc)->setScoreData(newData); |
|
last += numScoresScorer; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& diffs, |
|
statscores_t& scores) const |
|
{ |
|
|
|
size_t scorerNum = 0; |
|
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin(); |
|
itsc != m_scorers.end(); ++itsc) { |
|
|
|
statscores_t tscores; |
|
(*itsc)->score(candidates,diffs,tscores); |
|
size_t inc = 0; |
|
for (statscores_t::iterator itstatsc = tscores.begin(); |
|
itstatsc != tscores.end(); ++itstatsc) { |
|
|
|
float weight = m_scorer_weights[scorerNum]; |
|
if (weight == 0) { |
|
stringstream msg; |
|
msg << "No weights for scorer" << scorerNum ; |
|
throw runtime_error(msg.str()); |
|
} |
|
if (scorerNum == 0) { |
|
scores.push_back(weight * (*itstatsc)); |
|
} else { |
|
scores[inc] += weight * (*itstatsc); |
|
} |
|
|
|
inc++; |
|
|
|
} |
|
scorerNum++; |
|
} |
|
|
|
} |
|
|
|
|
|
|
|
float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& totals) const |
|
{ |
|
size_t scorerNum = 0; |
|
size_t last = 0; |
|
float score = 0; |
|
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin(); |
|
itsc != m_scorers.end(); ++itsc) { |
|
int numScoresScorer = (*itsc)->NumberOfScores(); |
|
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer); |
|
score += (*itsc)->calculateScore(totals_scorer) * m_scorer_weights[scorerNum]; |
|
last += numScoresScorer; |
|
scorerNum++; |
|
} |
|
return score; |
|
} |
|
|
|
|
|
float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const |
|
{ |
|
size_t scorerNum = 0; |
|
size_t last = 0; |
|
float refLen = 0; |
|
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin(); |
|
itsc != m_scorers.end(); ++itsc) { |
|
int numScoresScorer = (*itsc)->NumberOfScores(); |
|
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer); |
|
refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum]; |
|
last += numScoresScorer; |
|
scorerNum++; |
|
} |
|
return refLen; |
|
} |
|
|
|
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles) |
|
{ |
|
for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin(); |
|
itsc != m_scorers.end(); ++itsc) { |
|
(*itsc)->setReferenceFiles(referenceFiles); |
|
} |
|
} |
|
|
|
void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry) |
|
{ |
|
stringstream buff; |
|
string align = text; |
|
string sentence = text; |
|
size_t alignmentData = text.find("|||"); |
|
|
|
if(alignmentData != string::npos) { |
|
getNextPound(align,sentence, "|||"); |
|
} |
|
|
|
int i = 0; |
|
for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin(); itsc != m_scorers.end(); ++itsc) { |
|
ScoreStats tempEntry; |
|
if ((*itsc)->useAlignment()) { |
|
(*itsc)->prepareStats(sid, text, tempEntry); |
|
} else { |
|
(*itsc)->prepareStats(sid, sentence, tempEntry); |
|
} |
|
if (i > 0) buff << " "; |
|
buff << tempEntry; |
|
i++; |
|
} |
|
|
|
string str = buff.str(); |
|
entry.set(str); |
|
} |
|
|
|
void InterpolatedScorer::setFactors(const string& factors) |
|
{ |
|
if (factors.empty()) return; |
|
|
|
vector<string> fsplit; |
|
split(factors, ',', fsplit); |
|
|
|
if (fsplit.size() != m_scorers.size()) |
|
throw runtime_error("Number of factor specifications does not equal number of interpolated scorers."); |
|
|
|
for (size_t i = 0; i < m_scorers.size(); ++i) { |
|
m_scorers[i]->setFactors(fsplit[i]); |
|
} |
|
} |
|
|
|
void InterpolatedScorer::setFilter(const string& filterCommand) |
|
{ |
|
if (filterCommand.empty()) return; |
|
|
|
vector<string> csplit; |
|
split(filterCommand, ',', csplit); |
|
|
|
if (csplit.size() != m_scorers.size()) |
|
throw runtime_error("Number of command specifications does not equal number of interpolated scorers."); |
|
|
|
for (size_t i = 0; i < m_scorers.size(); ++i) { |
|
m_scorers[i]->setFilter(csplit[i]); |
|
} |
|
} |
|
|
|
} |
|
|