|
import re |
|
from indic_transliteration import sanscript |
|
|
|
|
|
|
|
_iast_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [ |
|
('a', 'ə'), |
|
('ā', 'aː'), |
|
('ī', 'iː'), |
|
('ū', 'uː'), |
|
('ṛ', 'ɹ`'), |
|
('ṝ', 'ɹ`ː'), |
|
('ḷ', 'l`'), |
|
('ḹ', 'l`ː'), |
|
('e', 'eː'), |
|
('o', 'oː'), |
|
('k', 'k⁼'), |
|
('k⁼h', 'kʰ'), |
|
('g', 'g⁼'), |
|
('g⁼h', 'gʰ'), |
|
('ṅ', 'ŋ'), |
|
('c', 'ʧ⁼'), |
|
('ʧ⁼h', 'ʧʰ'), |
|
('j', 'ʥ⁼'), |
|
('ʥ⁼h', 'ʥʰ'), |
|
('ñ', 'n^'), |
|
('ṭ', 't`⁼'), |
|
('t`⁼h', 't`ʰ'), |
|
('ḍ', 'd`⁼'), |
|
('d`⁼h', 'd`ʰ'), |
|
('ṇ', 'n`'), |
|
('t', 't⁼'), |
|
('t⁼h', 'tʰ'), |
|
('d', 'd⁼'), |
|
('d⁼h', 'dʰ'), |
|
('p', 'p⁼'), |
|
('p⁼h', 'pʰ'), |
|
('b', 'b⁼'), |
|
('b⁼h', 'bʰ'), |
|
('y', 'j'), |
|
('ś', 'ʃ'), |
|
('ṣ', 's`'), |
|
('r', 'ɾ'), |
|
('l̤', 'l`'), |
|
('h', 'ɦ'), |
|
("'", ''), |
|
('~', '^'), |
|
('ṃ', '^') |
|
]] |
|
|
|
|
|
def devanagari_to_ipa(text): |
|
text = text.replace('ॐ', 'ओम्') |
|
text = re.sub(r'\s*।\s*$', '.', text) |
|
text = re.sub(r'\s*।\s*', ', ', text) |
|
text = re.sub(r'\s*॥', '.', text) |
|
text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST) |
|
for regex, replacement in _iast_to_ipa: |
|
text = re.sub(regex, replacement, text) |
|
text = re.sub('(.)[`ː]*ḥ', lambda x: x.group(0) |
|
[:-1]+'h'+x.group(1)+'*', text) |
|
return text |
|
|