#include "vocab.hh" #include #include #include namespace lm { namespace vocab { void ReadSingle(std::istream &in, boost::unordered_set &out) { in.exceptions(std::istream::badbit); std::string word; while (in >> word) { out.insert(word); } } namespace { bool IsLineEnd(std::istream &in) { int got; do { got = in.get(); if (!in) return true; if (got == '\n') return true; } while (isspace(got)); in.unget(); return false; } }// namespace // Read space separated words in enter separated lines. These lines can be // very long, so don't read an entire line at a time. unsigned int ReadMultiple(std::istream &in, boost::unordered_map > &out) { in.exceptions(std::istream::badbit); unsigned int sentence = 0; bool used_id = false; std::string word; while (in >> word) { used_id = true; std::vector &posting = out[word]; if (posting.empty() || (posting.back() != sentence)) posting.push_back(sentence); if (IsLineEnd(in)) { ++sentence; used_id = false; } } return sentence + used_id; } } // namespace vocab } // namespace lm