|
#ifndef LM_COMMON_PRINT_H |
|
#define LM_COMMON_PRINT_H |
|
|
|
#include "../word_index.hh" |
|
#include "../../util/mmap.hh" |
|
#include "../../util/string_piece.hh" |
|
|
|
#include <cassert> |
|
#include <vector> |
|
|
|
namespace util { namespace stream { class ChainPositions; }} |
|
|
|
|
|
|
|
|
|
|
|
namespace lm { |
|
|
|
class VocabReconstitute { |
|
public: |
|
|
|
explicit VocabReconstitute(int fd); |
|
|
|
const char *Lookup(WordIndex index) const { |
|
assert(index < map_.size() - 1); |
|
return map_[index]; |
|
} |
|
|
|
StringPiece LookupPiece(WordIndex index) const { |
|
return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]); |
|
} |
|
|
|
std::size_t Size() const { |
|
|
|
return map_.size() - 1; |
|
} |
|
|
|
private: |
|
util::scoped_memory memory_; |
|
std::vector<const char*> map_; |
|
}; |
|
|
|
class PrintARPA { |
|
public: |
|
|
|
explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts) |
|
: vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {} |
|
|
|
void Run(const util::stream::ChainPositions &positions); |
|
|
|
private: |
|
int vocab_fd_; |
|
int out_fd_; |
|
std::vector<uint64_t> counts_; |
|
}; |
|
|
|
} |
|
#endif |
|
|