import epitran def to_lroh(s): s = s.replace('ɖ', 'ḍ') s = s.replace('ɾ', 'r') s = s.replace('ɽ', 'ṛ') s = s.replace('ʃ', 'š') s = s.replace('ʈ', 'ṭ') s = s.replace('j', 'y') s = s.replace('d͡ʒ', 'j') s = s.replace('ɑ̃ː', 'ɑ̃ɑ') s = s.replace('ɑː', 'ɑɑ') s = s.replace('ẽː', 'eẽ') s = s.replace('eː', 'ee') s = s.replace('ĩː', 'iĩ') s = s.replace('iː', 'ii') s = s.replace('ɔ̃ː', 'ɔ̃ɔ') s = s.replace('ɔː', 'ɔɔ') s = s.replace('ũː', 'uũ') s = s.replace('uː', 'uu') s = s.replace('ɑ', 'a') s = s.replace('̃ɑ', 'ã') s = s.replace('ɔ̃', 'õ') s = s.replace('ɔ', 'o') return s def to_roheng(s): s = s.replace('ɖ', 'dh') s = s.replace('ɾ', 'r') s = s.replace('ɽ', 'ç') s = s.replace('ʃ', 'c') s = s.replace('ʈ', 'th') s = s.replace('j', 'y') s = s.replace('d͡ʒ', 'j') s = s.replace('ɑ', 'a') s = s.replace('̃ɑ', 'ã') s = s.replace('ɑ̃ː', 'ɑ̃ɑ') s = s.replace('ɑː', 'ɑɑ') s = s.replace('ẽː', 'eẽ') s = s.replace('eː', 'ee') s = s.replace('ĩː', 'iĩ') s = s.replace('iː', 'ii') s = s.replace('ɔ̃ː', 'ɔ̃ɔ') s = s.replace('ɔː', 'ɔɔ') s = s.replace('ũː', 'uũ') s = s.replace('uː', 'uu') s = s.replace('ɑ', 'a') s = s.replace('̃ɑ', 'ã') s = s.replace('ɔ̃', 'õ') s = s.replace('ɔ', 'o') return s def convert_script(input_script, output_script, input_text): epi = epitran.Epitran(input_script) # store indices for capitalized words (will assume only first letter is capitalized) words = input_text.split() capital_indices = [i for i, word in enumerate(words) if word[0].isupper()] grapheme_text = epi.transliterate(input_text) if output_script == 'rhg-roheng': inter_text = to_roheng(grapheme_text) elif output_script == 'rhg-lroh': inter_text = to_lroh(grapheme_text) # reapply capitalization words = inter_text.split() for i in capital_indices: if i < len(words): words[i] = words[i].capitalize() output_text = ' '.join(words) return output_text # print (f'Number of script mismatches: {numScriptMismatch} / {numEntries}') # issues # # ou # glides with only one vowel nasalized (i.e is the whole glide always nasalized) (.e.g thiañ/ṭĩya) - need a constant way to deal with glides and nasalization (i.e. which vowel is nasalized?)