|
#ifndef LM_BUILDER_PIPELINE_H |
|
#define LM_BUILDER_PIPELINE_H |
|
|
|
#include "lm/builder/adjust_counts.hh" |
|
#include "lm/builder/initial_probabilities.hh" |
|
#include "lm/builder/header_info.hh" |
|
#include "lm/lm_exception.hh" |
|
#include "lm/word_index.hh" |
|
#include "util/stream/config.hh" |
|
#include "util/file_piece.hh" |
|
|
|
#include <string> |
|
#include <cstddef> |
|
|
|
namespace lm { namespace builder { |
|
|
|
class Output; |
|
|
|
struct PipelineConfig { |
|
std::size_t order; |
|
std::string vocab_file; |
|
util::stream::SortConfig sort; |
|
InitialProbabilitiesConfig initial_probs; |
|
util::stream::ChainConfig read_backoffs; |
|
|
|
|
|
|
|
lm::WordIndex vocab_estimate; |
|
|
|
|
|
std::size_t minimum_block; |
|
|
|
|
|
std::size_t block_count; |
|
|
|
|
|
|
|
std::vector<uint64_t> prune_thresholds; |
|
bool prune_vocab; |
|
std::string prune_vocab_file; |
|
|
|
|
|
DiscountConfig discount; |
|
|
|
|
|
bool output_q; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uint64_t vocab_size_for_unk; |
|
|
|
|
|
|
|
|
|
|
|
WarningAction disallowed_symbol_action; |
|
|
|
const std::string &TempPrefix() const { return sort.temp_prefix; } |
|
std::size_t TotalMemory() const { return sort.total_memory; } |
|
}; |
|
|
|
|
|
void Pipeline(PipelineConfig &config, int text_file, Output &output); |
|
|
|
}} |
|
#endif |
|
|