| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifndef moses_PhraseDictionaryMultiModelCounts_h |
| | #define moses_PhraseDictionaryMultiModelCounts_h |
| |
|
| | #include "moses/TranslationModel/PhraseDictionaryMultiModel.h" |
| |
|
| |
|
| | #include <boost/unordered_map.hpp> |
| | #include "moses/StaticData.h" |
| | #include "moses/TargetPhrase.h" |
| | #include "moses/Util.h" |
| | #include <exception> |
| |
|
| | namespace Moses |
| | { |
| |
|
| | typedef boost::unordered_map<Word, double > lexicalMap; |
| | typedef boost::unordered_map<Word, lexicalMap > lexicalMapJoint; |
| | typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair; |
| | typedef std::vector<std::vector<lexicalPair> > lexicalCache; |
| |
|
| | struct multiModelCountsStats : multiModelStats { |
| | std::vector<float> fst, ft; |
| | }; |
| |
|
| | struct multiModelCountsStatsOptimization: multiModelCountsStats { |
| | std::vector<float> fs; |
| | lexicalCache lexCachee2f, lexCachef2e; |
| | size_t f; |
| | }; |
| |
|
| | struct lexicalTable { |
| | lexicalMapJoint joint; |
| | lexicalMap marginal; |
| | }; |
| |
|
| | double InstanceWeighting(std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights); |
| | double LinearInterpolationFromCounts(std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights); |
| |
|
| |
|
| | |
| | class AlignmentException : public std::runtime_error |
| | { |
| | public: |
| | AlignmentException() : std::runtime_error("AlignmentException") { } |
| | }; |
| |
|
| |
|
| | |
| | |
| | class PhraseDictionaryMultiModelCounts: public PhraseDictionaryMultiModel |
| | { |
| |
|
| | #ifdef WITH_DLIB |
| | friend class CrossEntropyCounts; |
| | #endif |
| |
|
| | typedef std::vector< std::set<size_t> > AlignVector; |
| |
|
| |
|
| | public: |
| | PhraseDictionaryMultiModelCounts(const std::string &line); |
| | ~PhraseDictionaryMultiModelCounts(); |
| | void Load(AllOptions::ptr const& opts); |
| | TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats, std::vector<std::vector<float> > &multimodelweights) const; |
| | void CollectSufficientStats(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats) const; |
| | float GetTargetCount(const Phrase& target, size_t modelIndex) const; |
| | double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const; |
| | double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const; |
| | double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const; |
| | std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const; |
| | std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input ); |
| | void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const; |
| | void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const; |
| | void LoadLexicalTable( std::string &fileName, lexicalTable* ltable); |
| | TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const; |
| | #ifdef WITH_DLIB |
| | std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector); |
| | #endif |
| | |
| | virtual void InitializeForInput(ttasksptr const& ttask) { |
| | |
| | } |
| |
|
| | void SetParameter(const std::string& key, const std::string& value); |
| |
|
| | private: |
| | std::vector<PhraseDictionary*> m_inverse_pd; |
| | std::vector<lexicalTable*> m_lexTable_e2f, m_lexTable_f2e; |
| | double (*m_combineFunction) (std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights); |
| |
|
| | std::vector<std::string> m_lexE2FStr, m_lexF2EStr, m_targetTable; |
| |
|
| | }; |
| |
|
| | #ifdef WITH_DLIB |
| | class CrossEntropyCounts: public OptimizationObjective |
| | { |
| | public: |
| |
|
| | CrossEntropyCounts ( |
| | std::vector<multiModelCountsStatsOptimization*> &optimizerStats, |
| | PhraseDictionaryMultiModelCounts * model, |
| | size_t iFeature |
| | ) { |
| | m_optimizerStats = optimizerStats; |
| | m_model = model; |
| | m_iFeature = iFeature; |
| | } |
| |
|
| | double operator() ( const dlib::matrix<double,0,1>& arg) const; |
| |
|
| | private: |
| | std::vector<multiModelCountsStatsOptimization*> m_optimizerStats; |
| | PhraseDictionaryMultiModelCounts * m_model; |
| | size_t m_iFeature; |
| | }; |
| | #endif |
| |
|
| | } |
| |
|
| | #endif |
| |
|