File size: 1,107 Bytes
609216a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import epitran

class DictFirst:
    """If words are in a dictionary, use one model; if words are not, use another fallback.
     
    Args:
        code1 (str): language-script code for dictionary language
        code2 (str): language-script code for fall-back language
        dict_fn (str): file path to text file containing dictionary, one word per line
    """
    def __init__(self, code1, code2, dict_fn):
        self.epi1 = epitran.Epitran(code1)
        self.epi2 = epitran.Epitran(code2)
        self.dictionary = self._read_dictionary(dict_fn)

    def _read_dictionary(self, dict_fn):
        with open(dict_fn, encoding='utf-8') as f:
            return {x.strip(): self.epi1.transliterate(x.strip()) for x in f}

    def transliterate(self, token):
        """Convert token to IPA, falling back on second language

        Args:
            token (str): token to covert to IPA

        Returns:
            str: IPA equivalent of token
        """
        if token in self.dictionary:
            return self.dictionary[token]
        else:
            return self.epi2.transliterate(token)