File size: 1,507 Bytes
d5ee97c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#ifndef PHONEMIZER_H
#define PHONEMIZER_H
#include "tfg2p.h"
#include <tuple>
#include <set>
#include <algorithm>

struct IdStr{
    int32_t ID;
    std::string STR;
};


struct StrStr{
    std::string Word;
    std::string Phn;
};


class Phonemizer
{
private:
    TFG2P G2pModel;

    std::vector<IdStr> CharId;
    std::vector<IdStr> PhnId;






    std::vector<IdStr> GetDelimitedFile(const std::string& InFname);


    // Sorry, can't use set, unordered_map or any other types. (I tried)
    std::vector<StrStr> Dictionary;

    void LoadDictionary(const std::string& InDictFn);

    std::string DictLookup(const std::string& InWord);



    std::string PhnLanguage;
public:
    Phonemizer();
    /*
     * Initialize a phonemizer
     * Expects:
     * - Two files consisting in TOKEN \t ID:
     * -- char2id.txt: Translation from input character to ID the model can accept
     * -- phn2id.txt: Translation from output ID from the model to phoneme
     * - A model/ folder where a G2P-Tensorflow model was saved as SavedModel
     * - dict.txt: Phonetic dictionary. First it searches the word there and if it can't be found then it uses the model.

    */
    bool Initialize(const std::string InPath);
    std::string ProcessWord(const std::string& InWord, float Temperature = 0.1f);
    std::string GetPhnLanguage() const;
    void SetPhnLanguage(const std::string &value);

    std::string GetGraphemeChars();

};


bool operator<(const StrStr& right,const StrStr& left);
#endif // PHONEMIZER_H