|
#ifndef LM_MODEL_H |
|
#define LM_MODEL_H |
|
|
|
#include "lm/bhiksha.hh" |
|
#include "lm/binary_format.hh" |
|
#include "lm/config.hh" |
|
#include "lm/facade.hh" |
|
#include "lm/quantize.hh" |
|
#include "lm/search_hashed.hh" |
|
#include "lm/search_trie.hh" |
|
#include "lm/state.hh" |
|
#include "lm/value.hh" |
|
#include "lm/vocab.hh" |
|
#include "lm/weights.hh" |
|
|
|
#include "util/murmur_hash.hh" |
|
|
|
#include <algorithm> |
|
#include <vector> |
|
#include <cstring> |
|
|
|
namespace util { class FilePiece; } |
|
|
|
namespace lm { |
|
namespace ngram { |
|
namespace detail { |
|
|
|
|
|
|
|
template <class Search, class VocabularyT> class GenericModel : public base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> { |
|
private: |
|
typedef base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> P; |
|
public: |
|
|
|
static const ModelType kModelType; |
|
|
|
static const unsigned int kVersion = Search::kVersion; |
|
|
|
|
|
|
|
|
|
|
|
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config = Config()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
explicit GenericModel(const char *file, const Config &config = Config()); |
|
|
|
|
|
|
|
|
|
|
|
FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void GetState(const WordIndex *context_rbegin, const WordIndex *context_rend, State &out_state) const; |
|
|
|
|
|
|
|
|
|
|
|
FullScoreReturn ExtendLeft( |
|
|
|
const WordIndex *add_rbegin, const WordIndex *add_rend, |
|
|
|
const float *backoff_in, |
|
|
|
uint64_t extend_pointer, |
|
|
|
unsigned char extend_length, |
|
|
|
float *backoff_out, |
|
|
|
unsigned char &next_use) const; |
|
|
|
|
|
|
|
|
|
|
|
float UnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const { |
|
|
|
return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0; |
|
} |
|
|
|
private: |
|
FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const; |
|
|
|
|
|
void ResumeScore(const WordIndex *context_rbegin, const WordIndex *const context_rend, unsigned char starting_order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const; |
|
|
|
|
|
void SetupMemory(void *start, const std::vector<uint64_t> &counts, const Config &config); |
|
|
|
void InitializeFromARPA(int fd, const char *file, const Config &config); |
|
|
|
float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const; |
|
|
|
BinaryFormat backing_; |
|
|
|
VocabularyT vocab_; |
|
|
|
Search search_; |
|
}; |
|
|
|
} |
|
|
|
|
|
|
|
#define LM_COMMA() , |
|
#define LM_NAME_MODEL(name, from)\ |
|
class name : public from {\ |
|
public:\ |
|
name(const char *file, const Config &config = Config()) : from(file, config) {}\ |
|
}; |
|
|
|
LM_NAME_MODEL(ProbingModel, detail::GenericModel<detail::HashedSearch<BackoffValue> LM_COMMA() ProbingVocabulary>); |
|
LM_NAME_MODEL(RestProbingModel, detail::GenericModel<detail::HashedSearch<RestValue> LM_COMMA() ProbingVocabulary>); |
|
LM_NAME_MODEL(TrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>); |
|
LM_NAME_MODEL(ArrayTrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>); |
|
LM_NAME_MODEL(QuantTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>); |
|
LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>); |
|
|
|
|
|
typedef ::lm::ngram::ProbingVocabulary Vocabulary; |
|
typedef ProbingModel Model; |
|
|
|
|
|
|
|
|
|
base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING); |
|
|
|
} |
|
} |
|
|
|
#endif |
|
|