mmstts / mm_num2word.py
p
Text to speech for 1000+ languages
c9574d9
"""
This file is adapted from https://github.com/hpbyte/Myanmar_Number_to_Words
"""
import re
mm_digit = {
"แ€": "แ€žแ€ฏแ€Š",
"แ": "แ€แ€…แ€บ",
"แ‚": "แ€”แ€พแ€…แ€บ",
"แƒ": "แ€žแ€ฏแ€ถ:",
"แ„": "แ€œแ€ฑ:",
"แ…": "แ€„แ€ซ:",
"แ†": "แ€แ€ผแ€ฑแ€ฌแ€€แ€บ",
"แ‡": "แ€แ€ฏแ€”แ€พแ€…แ€บ",
"แˆ": "แ€›แ€พแ€…แ€บ",
"แ‰": "แ€€แ€ญแ€ฏ:",
}
# regular expressions
rgxPh = "^(แ€แ|แ€แ‰)"
rgxDate = "[แ€-แ‰]{1,2}-[แ€-แ‰]{1,2}-[แ€-แ‰]{4}|[แ€-แ‰]{1,2}\/[แ€-แ‰]{1,2}\/[แ€-แ‰]{4}"
rgxTime = "[แ€-แ‰]{1,2}:[แ€-แ‰]{1,2}"
rgxDec = "[แ€-แ‰]*\.[แ€-แ‰]*"
rgxAmt = "[,แ€-แ‰]+"
def convert_digit(num):
"""
@type num str
@param num Myanmar number
@rtype str
@return converted Myanmar spoken words
"""
converted = ""
nb_digits = len(num)
def check_if_zero(pos):
return not num[-pos] == "แ€"
def hundred_thousandth_val():
n = num[:-5]
return (
("แ€žแ€ญแ€”แ€บ: " + mm_num2word(n))
if (n[-2:] == "แ€แ€")
else (mm_num2word(n) + "แ€žแ€ญแ€”แ€บ: ")
)
def thousandth_val():
return mm_digit[num[-4]] + ("แ€‘แ€ฑแ€ฌแ€„แ€บ " if (num[-3:] == "แ€แ€แ€") else "แ€‘แ€ฑแ€ฌแ€„แ€บแ€ท ")
def hundredth_val():
return mm_digit[num[-3]] + (
"แ€›แ€ฌแ€ท "
if (
(num[-2] == "แ€" and re.match(r"[แ-แ‰]", num[-1]))
or (re.match(r"[แ-แ‰]", num[-2]) and num[-1] == "แ€")
)
else "แ€›แ€ฌ "
)
def tenth_val():
return ("" if (num[-2] == "แ") else mm_digit[num[-2]]) + (
"แ€†แ€šแ€บ " if (num[-1] == "แ€") else "แ€†แ€šแ€บแ€ท "
)
if nb_digits > 5:
converted += hundred_thousandth_val()
if (nb_digits > 4) and check_if_zero(5):
converted += mm_digit[num[-5]] + "แ€žแ€ฑแ€ฌแ€„แ€บ: "
if (nb_digits > 3) and check_if_zero(4):
converted += thousandth_val()
if (nb_digits > 2) and check_if_zero(3):
converted += hundredth_val()
if (nb_digits > 1) and check_if_zero(2):
converted += tenth_val()
if (nb_digits > 0) and check_if_zero(1):
converted += mm_digit[num[-1]]
return converted
def mm_num2word(num):
"""
Detect type of number and convert accordingly
@type num str
@param num Myanmar number
@rtype str
@return converted Myanmar spoken words
"""
word = ""
# phone number
if re.match(r"" + rgxPh, num[:2]):
word = " ".join([(mm_digit[d] if not d == "แ‡" else "แ€แ€ฝแ€”แ€บ") for d in num])
# date
elif re.match(r"" + rgxDate, num):
n = re.split(r"-|/", num)
word = (
convert_digit(n[-1])
+ " แ€แ€ฏแ€”แ€พแ€…แ€บ "
+ convert_digit(n[1])
+ " แ€œแ€•แ€ญแ€ฏแ€„แ€บ: "
+ convert_digit(n[0])
+ " แ€›แ€€แ€บ"
)
# time
elif re.match(r"" + rgxTime, num):
n = re.split(r":", num)
word = (convert_digit(n[0]) + " แ€”แ€ฌแ€›แ€ฎ ") + (
"แ€แ€ฝแ€ฒ" if (n[1] == "แƒแ€") else (convert_digit(n[1]) + " แ€™แ€ญแ€”แ€…แ€บ")
)
# decimal
elif re.match(r"" + rgxDec, num):
n = re.split(r"\.", num)
word = convert_digit(n[0]) + " แ€’แ€ฟแ€™ " + " ".join([mm_digit[d] for d in n[1]])
# amount
elif re.match(r"" + rgxAmt, num):
word = convert_digit(num.replace(",", ""))
# default
else:
raise Exception("Cannot convert the provided number format!")
return word
def extract_num(S):
"""
Extract numbers from the input string
@type S str
@param S Myanmar sentence
@rtype list
@return a list of Myanmar numbers
"""
matchedNums = re.compile(
"%s|%s|%s|%s" % (rgxDate, rgxTime, rgxDec, rgxAmt)
).findall(S)
return matchedNums