import re from indic_transliteration import sanscript # List of (iast, ipa) pairs: _iast_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [ ('a', 'ə'), ('ā', 'aː'), ('ī', 'iː'), ('ū', 'uː'), ('ṛ', 'ɹ`'), ('ṝ', 'ɹ`ː'), ('ḷ', 'l`'), ('ḹ', 'l`ː'), ('e', 'eː'), ('o', 'oː'), ('k', 'k⁼'), ('k⁼h', 'kʰ'), ('g', 'g⁼'), ('g⁼h', 'gʰ'), ('ṅ', 'ŋ'), ('c', 'ʧ⁼'), ('ʧ⁼h', 'ʧʰ'), ('j', 'ʥ⁼'), ('ʥ⁼h', 'ʥʰ'), ('ñ', 'n^'), ('ṭ', 't`⁼'), ('t`⁼h', 't`ʰ'), ('ḍ', 'd`⁼'), ('d`⁼h', 'd`ʰ'), ('ṇ', 'n`'), ('t', 't⁼'), ('t⁼h', 'tʰ'), ('d', 'd⁼'), ('d⁼h', 'dʰ'), ('p', 'p⁼'), ('p⁼h', 'pʰ'), ('b', 'b⁼'), ('b⁼h', 'bʰ'), ('y', 'j'), ('ś', 'ʃ'), ('ṣ', 's`'), ('r', 'ɾ'), ('l̤', 'l`'), ('h', 'ɦ'), ("'", ''), ('~', '^'), ('ṃ', '^') ]] def devanagari_to_ipa(text): text = text.replace('ॐ', 'ओम्') text = re.sub(r'\s*।\s*$', '.', text) text = re.sub(r'\s*।\s*', ', ', text) text = re.sub(r'\s*॥', '.', text) text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST) for regex, replacement in _iast_to_ipa: text = re.sub(regex, replacement, text) text = re.sub('(.)[`ː]*ḥ', lambda x: x.group(0) [:-1]+'h'+x.group(1)+'*', text) return text