# Copyright (c) 2023 Amphion. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import re from g2p_en import G2p from string import punctuation def read_lexicon(lex_path): lexicon = {} with open(lex_path) as f: for line in f: temp = re.split(r"\s+", line.strip("\n")) word = temp[0] phones = temp[1:] if word.lower() not in lexicon: lexicon[word.lower()] = phones return lexicon def preprocess_english(text, lexicon): text = text.rstrip(punctuation) g2p = G2p() phones = [] words = re.split(r"([,;.\-\?\!\s+])", text) for w in words: if w.lower() in lexicon: phones += lexicon[w.lower()] else: phones += list(filter(lambda p: p != " ", g2p(w))) phones = "}{".join(phones) phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones) phones = phones.replace("}{", " ") return phones