| | #include "ForestWriter.h" |
| |
|
| | #include <cassert> |
| | #include <vector> |
| |
|
| | #include "TopologicalSorter.h" |
| |
|
| | namespace MosesTraining |
| | { |
| | namespace Syntax |
| | { |
| | namespace PostprocessEgretForests |
| | { |
| |
|
| | void ForestWriter::Write(const std::string &sentence, const Forest &forest, |
| | std::size_t sentNum) |
| | { |
| | m_out << "sentence " << sentNum << " :" << std::endl; |
| | m_out << PossiblyEscape(sentence) << std::endl; |
| |
|
| | |
| | if (forest.vertices.empty()) { |
| | m_out << std::endl << std::endl; |
| | return; |
| | } |
| |
|
| | |
| | std::vector<const Forest::Vertex *> vertices; |
| | TopologicalSorter sorter; |
| | sorter.Sort(forest, vertices); |
| | for (std::vector<const Forest::Vertex *>::const_iterator p = vertices.begin(); |
| | p != vertices.end(); ++p) { |
| | const Forest::Vertex &v = **p; |
| | for (std::vector<boost::shared_ptr<Forest::Hyperedge> >::const_iterator |
| | q = v.incoming.begin(); q != v.incoming.end(); ++q) { |
| | WriteHyperedgeLine(**q); |
| | } |
| | } |
| |
|
| | |
| | m_out << std::endl; |
| | } |
| |
|
| | void ForestWriter::WriteHyperedgeLine(const Forest::Hyperedge &e) |
| | { |
| | WriteVertex(*e.head); |
| | m_out << " =>"; |
| | for (std::vector<Forest::Vertex *>::const_iterator p = e.tail.begin(); |
| | p != e.tail.end(); ++p) { |
| | m_out << " "; |
| | WriteVertex(**p); |
| | } |
| | m_out << " ||| " << e.weight << std::endl; |
| | } |
| |
|
| | void ForestWriter::WriteVertex(const Forest::Vertex &v) |
| | { |
| | m_out << PossiblyEscape(v.symbol.value); |
| | if (!v.incoming.empty()) { |
| | m_out << "[" << v.start << "," << v.end << "]"; |
| | } |
| | } |
| |
|
| | std::string ForestWriter::PossiblyEscape(const std::string &s) const |
| | { |
| | if (m_options.escape) { |
| | return Escape(s); |
| | } else { |
| | return s; |
| | } |
| | } |
| |
|
| | |
| | std::string ForestWriter::Escape(const std::string &s) const |
| | { |
| | std::string t; |
| | std::size_t len = s.size(); |
| | t.reserve(len); |
| | for (std::size_t i = 0; i < len; ++i) { |
| | if (s[i] == '<') { |
| | t += "<"; |
| | } else if (s[i] == '>') { |
| | t += ">"; |
| | } else if (s[i] == '[') { |
| | t += "["; |
| | } else if (s[i] == ']') { |
| | t += "]"; |
| | } else if (s[i] == '|') { |
| | t += "|"; |
| | } else if (s[i] == '&') { |
| | t += "&"; |
| | } else if (s[i] == '\'') { |
| | t += "'"; |
| | } else if (s[i] == '"') { |
| | t += """; |
| | } else { |
| | t += s[i]; |
| | } |
| | } |
| | return t; |
| | } |
| |
|
| | } |
| | } |
| | } |
| |
|