micahg's picture
Initial file upload
import epitran
class DictFirst:
"""If words are in a dictionary, use one model; if words are not, use another fallback.
code1 (str): language-script code for dictionary language
code2 (str): language-script code for fall-back language
dict_fn (str): file path to text file containing dictionary, one word per line
def __init__(self, code1, code2, dict_fn):
self.epi1 = epitran.Epitran(code1)
self.epi2 = epitran.Epitran(code2)
self.dictionary = self._read_dictionary(dict_fn)
def _read_dictionary(self, dict_fn):
with open(dict_fn, encoding='utf-8') as f:
return {x.strip(): self.epi1.transliterate(x.strip()) for x in f}
def transliterate(self, token):
"""Convert token to IPA, falling back on second language
token (str): token to covert to IPA
str: IPA equivalent of token
if token in self.dictionary:
return self.dictionary[token]
return self.epi2.transliterate(token)