#include "split_worker.hh" #include "../common/ngram.hh" namespace lm { namespace interpolate { SplitWorker::SplitWorker(std::size_t order, util::stream::Chain &backoff_chain, util::stream::Chain &sort_chain) : order_(order) { backoff_chain >> backoff_input_; sort_chain >> sort_input_; } void SplitWorker::Run(const util::stream::ChainPosition &position) { // input: ngram record (id, prob, and backoff) // output: a float to the backoff_input stream // an ngram id and a float to the sort_input stream for (util::stream::Stream stream(position); stream; ++stream) { NGram ngram(stream.Get(), order_); // write id and prob to the sort stream float prob = ngram.Value().prob; lm::WordIndex *out = reinterpret_cast(sort_input_.Get()); for (const lm::WordIndex *it = ngram.begin(); it != ngram.end(); ++it) { *out++ = *it; } *reinterpret_cast(out) = prob; ++sort_input_; // write backoff to the backoff output stream float boff = ngram.Value().backoff; *reinterpret_cast(backoff_input_.Get()) = boff; ++backoff_input_; } sort_input_.Poison(); backoff_input_.Poison(); } } }