| #include "ForestParser.h" |
|
|
| #include <istream> |
| #include <string> |
|
|
| #include <boost/make_shared.hpp> |
|
|
| #include "util/tokenize_piece.hh" |
|
|
| #include "syntax-common/exception.h" |
|
|
| namespace MosesTraining |
| { |
| namespace Syntax |
| { |
| namespace PostprocessEgretForests |
| { |
|
|
| ForestParser::ForestParser() |
| : m_input(0) |
| { |
| } |
|
|
| ForestParser::ForestParser(std::istream &input) |
| : m_input(&input) |
| { |
| ++(*this); |
| } |
|
|
| ForestParser &ForestParser::operator++() |
| { |
| if (!m_input) { |
| return *this; |
| } |
| m_vertexSet.clear(); |
| m_entry.forest.vertices.clear(); |
| if (!std::getline(*m_input, m_tmpLine)) { |
| m_input = 0; |
| return *this; |
| } |
| |
| ParseSentenceNumLine(m_tmpLine, m_entry.sentNum); |
| |
| std::getline(*m_input, m_entry.sentence); |
| |
| |
| std::getline(*m_input, m_tmpLine); |
| if (m_tmpLine == "") { |
| std::getline(*m_input, m_tmpLine); |
| assert(m_tmpLine == ""); |
| return *this; |
| } |
| while (m_tmpLine != "") { |
| ParseHyperedgeLine(m_tmpLine, m_entry.forest); |
| std::getline(*m_input, m_tmpLine); |
| } |
| return *this; |
| } |
|
|
| boost::shared_ptr<Forest::Vertex> ForestParser::AddVertex(const VertexSP &v) |
| { |
| std::pair<VertexSet::iterator, bool> ret = m_vertexSet.insert(v); |
| if (ret.second) { |
| m_entry.forest.vertices.push_back(*ret.first); |
| } |
| return *ret.first; |
| } |
|
|
| void ForestParser::ParseSentenceNumLine(const std::string &line, |
| std::size_t &sentNum) |
| { |
| const util::AnyCharacter delimiter(" \t"); |
| util::TokenIter<util::AnyCharacter, true> p(line, delimiter); |
| if (*p != "sentence") { |
| |
| throw Exception(""); |
| } |
| ++p; |
| std::string tmp; |
| p->CopyToString(&tmp); |
| sentNum = std::atoi(tmp.c_str()); |
| } |
|
|
| void ForestParser::ParseHyperedgeLine(const std::string &line, Forest &forest) |
| { |
| const util::AnyCharacter delimiter(" \t"); |
| util::TokenIter<util::AnyCharacter, true> p(line, delimiter); |
| VertexSP v = AddVertex(ParseVertex(*p)); |
| HyperedgeSP e = boost::make_shared<Forest::Hyperedge>(); |
| e->head = v.get(); |
| ++p; |
| if (*p != "=>") { |
| |
| throw Exception(""); |
| } |
| for (++p; *p != "|||"; ++p) { |
| v = ParseVertex(*p); |
| if (v->start == -1) { |
| |
| v->start = v->end = e->head->start; |
| } |
| e->tail.push_back(AddVertex(v).get()); |
| } |
| ++p; |
| std::string tmp; |
| p->CopyToString(&tmp); |
| e->weight = std::atof(tmp.c_str()); |
| e->head->incoming.push_back(e); |
| } |
|
|
| boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex( |
| const StringPiece &s) |
| { |
| VertexSP v = boost::make_shared<Forest::Vertex>(); |
| std::size_t pos = s.rfind('['); |
| if (pos == std::string::npos) { |
| s.CopyToString(&v->symbol.value); |
| v->symbol.isNonTerminal = false; |
| v->start = v->end = -1; |
| return v; |
| } |
| if (pos > 2 && s[pos-2] == '^' && s[pos-1] == 'g') { |
| s.substr(0, pos-2).CopyToString(&v->symbol.value); |
| } else { |
| s.substr(0, pos).CopyToString(&v->symbol.value); |
| } |
| v->symbol.isNonTerminal = true; |
| std::size_t begin = pos + 1; |
| pos = s.find(',', begin+1); |
| std::string tmp; |
| s.substr(begin, pos-begin).CopyToString(&tmp); |
| v->start = std::atoi(tmp.c_str()); |
| s.substr(pos+1, s.size()-pos-2).CopyToString(&tmp); |
| v->end = std::atoi(tmp.c_str()); |
| return v; |
| } |
|
|
| bool operator==(const ForestParser &lhs, const ForestParser &rhs) |
| { |
| |
| return lhs.m_input == rhs.m_input; |
| } |
|
|
| bool operator!=(const ForestParser &lhs, const ForestParser &rhs) |
| { |
| return !(lhs == rhs); |
| } |
|
|
| } |
| } |
| } |
|
|