File size: 1,625 Bytes
1ce325b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
#include "output.hh"
#include "../common/model_buffer.hh"
#include "../common/print.hh"
#include "../../util/file_stream.hh"
#include "../../util/stream/multi_stream.hh"
#include <iostream>
namespace lm { namespace builder {
OutputHook::~OutputHook() {}
Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
: buffer_(file_base, keep_buffer, output_q) {}
void Output::SinkProbs(util::stream::Chains &chains) {
Apply(PROB_PARALLEL_HOOK, chains);
if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
chains >> util::stream::kRecycle;
chains.Wait(true);
return;
}
buffer_.Sink(chains, header_.counts_pruned);
chains >> util::stream::kRecycle;
chains.Wait(false);
if (Have(PROB_SEQUENTIAL_HOOK)) {
std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
buffer_.Source(chains);
Apply(PROB_SEQUENTIAL_HOOK, chains);
chains >> util::stream::kRecycle;
chains.Wait(true);
}
}
void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
entry->Sink(header_, VocabFile(), chains);
}
}
void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
if (verbose_header_) {
util::FileStream out(file_.get(), 50);
out << "# Input file: " << info.input_file << '\n';
out << "# Token count: " << info.token_count << '\n';
out << "# Smoothing: Modified Kneser-Ney" << '\n';
}
chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
}
}} // namespaces
|