File size: 1,625 Bytes
1ce325b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include "output.hh"

#include "../common/model_buffer.hh"
#include "../common/print.hh"
#include "../../util/file_stream.hh"
#include "../../util/stream/multi_stream.hh"

#include <iostream>

namespace lm { namespace builder {

OutputHook::~OutputHook() {}

Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
  : buffer_(file_base, keep_buffer, output_q) {}

void Output::SinkProbs(util::stream::Chains &chains) {
  Apply(PROB_PARALLEL_HOOK, chains);
  if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
    chains >> util::stream::kRecycle;
    chains.Wait(true);
    return;
  }
  buffer_.Sink(chains, header_.counts_pruned);
  chains >> util::stream::kRecycle;
  chains.Wait(false);
  if (Have(PROB_SEQUENTIAL_HOOK)) {
    std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
    buffer_.Source(chains);
    Apply(PROB_SEQUENTIAL_HOOK, chains);
    chains >> util::stream::kRecycle;
    chains.Wait(true);
  }
}

void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
  for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
    entry->Sink(header_, VocabFile(), chains);
  }
}

void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
  if (verbose_header_) {
    util::FileStream out(file_.get(), 50);
    out << "# Input file: " << info.input_file << '\n';
    out << "# Token count: " << info.token_count << '\n';
    out << "# Smoothing: Modified Kneser-Ney" << '\n';
  }
  chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
}

}} // namespaces