|
#pragma once |
|
|
|
#include <vector> |
|
#include <cstdlib> |
|
#include "util/string_piece.hh" |
|
#include "util/tokenize_piece.hh" |
|
#include "util/file_piece.hh" |
|
#include "util/string_piece.hh" |
|
#include "util/tokenize_piece.hh" |
|
|
|
namespace probingpt |
|
{ |
|
|
|
|
|
struct line_text { |
|
StringPiece source_phrase; |
|
StringPiece target_phrase; |
|
StringPiece prob; |
|
StringPiece word_align; |
|
StringPiece counts; |
|
StringPiece sparse_score; |
|
StringPiece property; |
|
std::string property_to_be_binarized; |
|
}; |
|
|
|
|
|
struct target_text { |
|
std::vector<unsigned int> target_phrase; |
|
std::vector<float> prob; |
|
std::vector<size_t> word_align_term; |
|
std::vector<size_t> word_align_non_term; |
|
std::vector<char> counts; |
|
std::vector<char> sparse_score; |
|
std::vector<char> property; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}; |
|
|
|
|
|
line_text splitLine(const StringPiece &textin, bool scfg); |
|
void reformatSCFG(line_text &output); |
|
|
|
std::vector<unsigned char> splitWordAll1(const StringPiece &textin); |
|
|
|
} |
|
|
|
|