import re def can_form_string(x, symbol_dict): def helper(x, symbol_dict, matched_parts): if not x: return True, matched_parts for key in symbol_dict.keys(): if x.startswith(key): result, parts = helper( x[len(key) :], symbol_dict, matched_parts + [key] ) if result: return True, parts return False, [] return helper(x, symbol_dict, []) def text_to_ipa(text, lang_tag, g2p, ignore_comma=True): ipa = [] text = text.lower() text = re.sub(r"[.?!]", "", text) text = text.replace("'", "’") words = text.split() # change in future print(words) for word in words: ipa_parts = "" extended_g2p = {**g2p[lang_tag], ",": "" if ignore_comma else ","} result, matched_parts = can_form_string(word, extended_g2p) if result is False: print(word) return "" for matched_part in matched_parts: ipa_parts = ipa_parts + g2p[lang_tag][matched_part] ipa.append(ipa_parts) ipa = ( " ".join(ipa) .replace("g", "ɡ") .replace("ʦ", "t͡s") .replace("ʨ", "t͡ɕ") .replace("R", "ʀ") .replace("ʤ", "dʒ") ) return ipa