| #include "KBestExtractor.h" |
|
|
| #include "moses/ScoreComponentCollection.h" |
| #include "moses/StaticData.h" |
|
|
| #include <boost/scoped_ptr.hpp> |
|
|
| #include <vector> |
|
|
| namespace Moses |
| { |
| namespace Syntax |
| { |
|
|
| |
| void KBestExtractor::Extract( |
| const std::vector<boost::shared_ptr<SVertex> > &topLevelVertices, |
| std::size_t k, KBestVec &kBestList) |
| { |
| kBestList.clear(); |
| if (topLevelVertices.empty()) { |
| return; |
| } |
|
|
| |
| |
| std::vector<boost::shared_ptr<SVertex> >::const_iterator p = |
| topLevelVertices.begin(); |
| SVertex &bestTopLevelVertex = **p; |
| boost::scoped_ptr<SVertex> supremeVertex(new SVertex()); |
| supremeVertex->pvertex = 0; |
| supremeVertex->best = new SHyperedge(); |
| supremeVertex->best->head = supremeVertex.get(); |
| supremeVertex->best->tail.push_back(&bestTopLevelVertex); |
| supremeVertex->best->label.futureScore = |
| bestTopLevelVertex.best->label.futureScore; |
| supremeVertex->best->label.deltas = bestTopLevelVertex.best->label.deltas; |
| supremeVertex->best->label.translation = 0; |
|
|
| |
| |
| for (++p; p != topLevelVertices.end(); ++p) { |
| |
| UTIL_THROW_IF2((*p)->best->label.futureScore > |
| bestTopLevelVertex.best->label.futureScore, |
| "top-level SVertices are not correctly sorted"); |
| |
| |
| SHyperedge *altEdge = new SHyperedge(); |
| altEdge->head = supremeVertex.get(); |
| altEdge->tail.push_back((*p).get()); |
| altEdge->label.futureScore = (*p)->best->label.futureScore; |
| altEdge->label.deltas = (*p)->best->label.deltas; |
| altEdge->label.translation = 0; |
| supremeVertex->recombined.push_back(altEdge); |
| } |
|
|
| |
| boost::shared_ptr<KVertex> targetVertex = FindOrCreateVertex(*supremeVertex); |
| LazyKthBest(targetVertex, k, k); |
|
|
| |
| |
| kBestList.reserve(targetVertex->kBestList.size()); |
| for (std::vector<boost::weak_ptr<Derivation> >::const_iterator |
| q = targetVertex->kBestList.begin(); |
| q != targetVertex->kBestList.end(); ++q) { |
| const boost::shared_ptr<Derivation> d(*q); |
| assert(d); |
| assert(d->subderivations.size() == 1); |
| kBestList.push_back(d->subderivations[0]); |
| } |
| } |
|
|
| |
| Phrase KBestExtractor::GetOutputPhrase(const Derivation &d) |
| { |
| FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor; |
|
|
| Phrase ret(ARRAY_SIZE_INCR); |
|
|
| const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation); |
| const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = |
| phrase.GetAlignNonTerm().GetNonTermIndexMap(); |
| for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) { |
| const Word &word = phrase.GetWord(pos); |
| if (word.IsNonTerminal()) { |
| std::size_t nonTermInd = nonTermIndexMap[pos]; |
| const Derivation &subderivation = *d.subderivations[nonTermInd]; |
| Phrase subPhrase = GetOutputPhrase(subderivation); |
| ret.Append(subPhrase); |
| } else { |
| ret.AddWord(word); |
| if (placeholderFactor == NOT_FOUND) { |
| continue; |
| } |
| |
| UTIL_THROW2("placeholders are not currently supported by the S2T decoder"); |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| } |
| } |
|
|
| return ret; |
| } |
|
|
| |
| TreePointer KBestExtractor::GetOutputTree(const Derivation &d) |
| { |
| const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation); |
| if (const PhraseProperty *property = phrase.GetProperty("Tree")) { |
| const std::string *tree = property->GetValueString(); |
| TreePointer mytree (boost::make_shared<InternalTree>(*tree)); |
|
|
| |
| std::vector<TreePointer> previous_trees; |
| for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { |
| const Word &word = phrase.GetWord(pos); |
| if (word.IsNonTerminal()) { |
| size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos]; |
| const Derivation &subderivation = *d.subderivations[nonTermInd]; |
| const TreePointer prev_tree = GetOutputTree(subderivation); |
| previous_trees.push_back(prev_tree); |
| } |
| } |
|
|
| mytree->Combine(previous_trees); |
| return mytree; |
| } else { |
| UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found"); |
| } |
| } |
|
|
| |
| |
| boost::shared_ptr<KBestExtractor::KVertex> |
| KBestExtractor::FindOrCreateVertex(const SVertex &v) |
| { |
| |
| assert(v.best); |
|
|
| VertexMap::value_type element(&v, boost::shared_ptr<KVertex>()); |
| std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element); |
| boost::shared_ptr<KVertex> &sp = p.first->second; |
| if (!p.second) { |
| return sp; |
| } |
| sp.reset(new KVertex(v)); |
| |
| boost::shared_ptr<KHyperedge> bestEdge(new KHyperedge(*(v.best))); |
| bestEdge->head = sp; |
| std::size_t kTailSize = 0; |
| for (std::size_t i = 0; i < v.best->tail.size(); ++i) { |
| const SVertex *pred = v.best->tail[i]; |
| if (pred->best) { |
| ++kTailSize; |
| } |
| } |
| bestEdge->tail.reserve(kTailSize); |
| for (std::size_t i = 0; i < v.best->tail.size(); ++i) { |
| const SVertex *pred = v.best->tail[i]; |
| if (pred->best) { |
| bestEdge->tail.push_back(FindOrCreateVertex(*pred)); |
| } |
| } |
| boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge)); |
| #ifndef NDEBUG |
| std::pair<DerivationSet::iterator, bool> q = |
| #endif |
| m_derivations.insert(bestDerivation); |
| assert(q.second); |
| sp->kBestList.push_back(bestDerivation); |
| return sp; |
| } |
|
|
| |
| |
| void KBestExtractor::GetCandidates(boost::shared_ptr<KVertex> v, std::size_t k) |
| { |
| |
| |
| for (std::size_t i = 0; i < v->svertex.recombined.size(); ++i) { |
| const SHyperedge ­peredge = *(v->svertex.recombined[i]); |
| boost::shared_ptr<KHyperedge> bestEdge(new KHyperedge(shyperedge)); |
| bestEdge->head = v; |
| |
| std::size_t kTailSize = 0; |
| for (std::size_t j = 0; j < shyperedge.tail.size(); ++j) { |
| const SVertex *pred = shyperedge.tail[j]; |
| if (pred->best) { |
| ++kTailSize; |
| } |
| } |
| bestEdge->tail.reserve(kTailSize); |
| for (std::size_t j = 0; j < shyperedge.tail.size(); ++j) { |
| const SVertex *pred = shyperedge.tail[j]; |
| if (pred->best) { |
| bestEdge->tail.push_back(FindOrCreateVertex(*pred)); |
| } |
| } |
| boost::shared_ptr<Derivation> derivation(new Derivation(bestEdge)); |
| #ifndef NDEBUG |
| std::pair<DerivationSet::iterator, bool> q = |
| #endif |
| m_derivations.insert(derivation); |
| assert(q.second); |
| v->candidates.push(derivation); |
| } |
| } |
|
|
| |
| void KBestExtractor::LazyKthBest(boost::shared_ptr<KVertex> v, std::size_t k, |
| std::size_t globalK) |
| { |
| |
| if (v->visited == false) { |
| |
| assert(v->kBestList.size() == 1); |
| |
| GetCandidates(v, globalK); |
| v->visited = true; |
| } |
| |
| |
| while (v->kBestList.size() < k) { |
| assert(!v->kBestList.empty()); |
| |
| |
| boost::shared_ptr<Derivation> d(v->kBestList.back()); |
| LazyNext(*v, *d, globalK); |
| |
| if (v->candidates.empty()) { |
| break; |
| } |
| |
| boost::weak_ptr<Derivation> next = v->candidates.top(); |
| v->candidates.pop(); |
| |
| v->kBestList.push_back(next); |
| } |
| } |
|
|
| |
| void KBestExtractor::LazyNext(KVertex &v, const Derivation &d, |
| std::size_t globalK) |
| { |
| for (std::size_t i = 0; i < d.edge->tail.size(); ++i) { |
| boost::shared_ptr<KVertex> pred = d.edge->tail[i]; |
| |
| std::size_t k = d.backPointers[i] + 2; |
| LazyKthBest(pred, k, globalK); |
| if (pred->kBestList.size() < k) { |
| |
| continue; |
| } |
| |
| boost::shared_ptr<Derivation> next(new Derivation(d, i)); |
| |
| std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next); |
| if (p.second) { |
| v.candidates.push(next); |
| } |
| } |
| } |
|
|
| |
| KBestExtractor::Derivation::Derivation(const boost::shared_ptr<KHyperedge> &e) |
| { |
| edge = e; |
| const TargetPhrase *translation = edge->shyperedge.label.translation; |
| |
| |
| if (translation) { |
| scoreBreakdown = translation->GetScoreBreakdown(); |
| } |
| const std::size_t arity = edge->tail.size(); |
| backPointers.resize(arity, 0); |
| subderivations.reserve(arity); |
| for (std::size_t i = 0; i < arity; ++i) { |
| const KVertex &pred = *(edge->tail[i]); |
| assert(pred.kBestList.size() >= 1); |
| boost::shared_ptr<Derivation> sub(pred.kBestList[0]); |
| subderivations.push_back(sub); |
| scoreBreakdown.PlusEquals(sub->scoreBreakdown); |
| } |
| scoreBreakdown.PlusEquals(edge->shyperedge.label.deltas); |
| score = scoreBreakdown.GetWeightedScore(); |
| } |
|
|
| |
| KBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i) |
| { |
| edge = d.edge; |
| backPointers = d.backPointers; |
| subderivations = d.subderivations; |
| std::size_t j = ++backPointers[i]; |
| scoreBreakdown = d.scoreBreakdown; |
| |
| scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown); |
| |
| boost::shared_ptr<Derivation> newSub(edge->tail[i]->kBestList[j]); |
| subderivations[i] = newSub; |
| |
| scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown); |
| score = scoreBreakdown.GetWeightedScore(); |
| } |
|
|
| } |
| } |
|
|