| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | #include <string> |
| | #include <fstream> |
| | #include "OnDiskWrapper.h" |
| | #include "Vocab.h" |
| | #include "moses/Util.h" |
| | #include "util/exception.hh" |
| |
|
| | using namespace std; |
| |
|
| | namespace OnDiskPt |
| | { |
| |
|
| | bool Vocab::Load(OnDiskWrapper &onDiskWrapper) |
| | { |
| | fstream &file = onDiskWrapper.GetFileVocab(); |
| |
|
| | string line; |
| | while(getline(file, line)) { |
| | vector<string> tokens; |
| | Moses::Tokenize(tokens, line); |
| | UTIL_THROW_IF2(tokens.size() != 2, "Vocab file corrupted"); |
| | const string &key = tokens[0]; |
| | m_vocabColl[key] = Moses::Scan<uint64_t>(tokens[1]); |
| | } |
| |
|
| | |
| | |
| | m_lookup.resize(m_vocabColl.size() + 1); |
| | m_nextId = m_lookup.size(); |
| |
|
| | CollType::const_iterator iter; |
| | for (iter = m_vocabColl.begin(); iter != m_vocabColl.end(); ++iter) { |
| | uint32_t vocabId = iter->second; |
| | const std::string &word = iter->first; |
| |
|
| | m_lookup[vocabId] = word; |
| | } |
| |
|
| | return true; |
| | } |
| |
|
| | void Vocab::Save(OnDiskWrapper &onDiskWrapper) |
| | { |
| | fstream &file = onDiskWrapper.GetFileVocab(); |
| | CollType::const_iterator iterVocab; |
| | for (iterVocab = m_vocabColl.begin(); iterVocab != m_vocabColl.end(); ++iterVocab) { |
| | const string &word = iterVocab->first; |
| | uint32_t vocabId = iterVocab->second; |
| |
|
| | file << word << " " << vocabId << endl; |
| | } |
| | } |
| |
|
| | uint64_t Vocab::AddVocabId(const std::string &str) |
| | { |
| | |
| | CollType::const_iterator iter = m_vocabColl.find(str); |
| | if (iter == m_vocabColl.end()) { |
| | |
| | m_vocabColl[str] = m_nextId; |
| | return m_nextId++; |
| | } else { |
| | |
| | return iter->second; |
| | } |
| | } |
| |
|
| | uint64_t Vocab::GetVocabId(const std::string &str, bool &found) const |
| | { |
| | |
| | CollType::const_iterator iter = m_vocabColl.find(str); |
| | if (iter == m_vocabColl.end()) { |
| | found = false; |
| | return 0; |
| | } else { |
| | |
| | found = true; |
| | return iter->second; |
| | } |
| | } |
| |
|
| | } |
| |
|