File size: 1,507 Bytes
d5ee97c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#ifndef PHONEMIZER_H
#define PHONEMIZER_H
#include "tfg2p.h"
#include <tuple>
#include <set>
#include <algorithm>
struct IdStr{
int32_t ID;
std::string STR;
};
struct StrStr{
std::string Word;
std::string Phn;
};
class Phonemizer
{
private:
TFG2P G2pModel;
std::vector<IdStr> CharId;
std::vector<IdStr> PhnId;
std::vector<IdStr> GetDelimitedFile(const std::string& InFname);
// Sorry, can't use set, unordered_map or any other types. (I tried)
std::vector<StrStr> Dictionary;
void LoadDictionary(const std::string& InDictFn);
std::string DictLookup(const std::string& InWord);
std::string PhnLanguage;
public:
Phonemizer();
/*
* Initialize a phonemizer
* Expects:
* - Two files consisting in TOKEN \t ID:
* -- char2id.txt: Translation from input character to ID the model can accept
* -- phn2id.txt: Translation from output ID from the model to phoneme
* - A model/ folder where a G2P-Tensorflow model was saved as SavedModel
* - dict.txt: Phonetic dictionary. First it searches the word there and if it can't be found then it uses the model.
*/
bool Initialize(const std::string InPath);
std::string ProcessWord(const std::string& InWord, float Temperature = 0.1f);
std::string GetPhnLanguage() const;
void SetPhnLanguage(const std::string &value);
std::string GetGraphemeChars();
};
bool operator<(const StrStr& right,const StrStr& left);
#endif // PHONEMIZER_H
|