import re import string # ----------------------------- indic_num.py ----------------------------- supported_lang = {"en", "hi", "gu", "mr", "bn", "te", "ta", "kn", "or", "pa"} # supported_lang = {'eng', 'hin', 'guj', 'mar', 'ben', 'tel', 'tam', 'kan', 'ori', 'pan'} # Three alphabet lang code all_num = { "en": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], "hi": ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"], "gu": ["૦", "૧", "૨", "૩", "૪", "૫", "૬", "૭", "૮", "૯"], "mr": ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"], "bn": ["০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯"], "te": ["౦", "౧", "౨", "౩", "౪", "౫", "౬", "౭", "౮", "౯"], "ta": ["0", "௧", "௨", "௩", "௪", "௫", "௬", "௭", "௮", "௯", "௰"], "kn": ["೦", "೧", "೨", "೩", "೪", "೫", "೬", "೭", "೮", "೯"], "or": ["୦", "୧", "୨", "୩", "୪", "୫", "୬", "୭", "୮", "୯"], "pa": ["੦", "੧", "੨", "੩", "੪", "੫", "੬", "੭", "੮", "੯"], } num_dict = dict() num_dict["en"] = { "0": "zero", "1": "one", "2": "two", "3": "three", "4": "four", "5": "five", "6": "six", "7": "seven", "8": "eight", "9": "nine", "10": "ten", "11": "eleven", "12": "twelve", "13": "thirteen", "14": "fourteen", "15": "fifteen", "16": "sixteen", "17": "seventeen", "18": "eighteen", "19": "nineteen", "20": "twenty", "21": "twenty-one", "22": "twenty-two", "23": "twenty-three", "24": "twenty-four", "25": "twenty-five", "26": "twenty-six", "27": "twenty-seven", "28": "twenty-eight", "29": "twenty-nine", "30": "thirty", "31": "thirty-one", "32": "thirty-two", "33": "thirty-three", "34": "thirty-four", "35": "thirty-five", "36": "thirty-six", "37": "thirty-seven", "38": "thirty-eight", "39": "thirty-nine", "40": "forty", "41": "forty-one", "42": "forty-two", "43": "forty-three", "44": "forty-four", "45": "forty-five", "46": "forty-six", "47": "forty-seven", "48": "forty-eight", "49": "forty-nine", "50": "fifty", "51": "fifty-one", "52": "fifty-two", "53": "fifty-three", "54": "fifty-four", "55": "fifty-five", "56": "fifty-six", "57": "fifty-seven", "58": "fifty-eight", "59": "fifty-nine", "60": "sixty", "61": "sixty-one", "62": "sixty-two", "63": "sixty-three", "64": "sixty-four", "65": "sixty-five", "66": "sixty-six", "67": "sixty-seven", "68": "sixty-eight", "69": "sixty-nine", "70": "seventy", "71": "seventy-one", "72": "seventy-two", "73": "seventy-three", "74": "seventy-four", "75": "seventy-five", "76": "seventy-six", "77": "seventy-seven", "78": "seventy-eight", "79": "seventy-nine", "80": "eighty", "81": "eighty-one", "82": "eighty-two", "83": "eighty-three", "84": "eighty-four", "85": "eighty-five", "86": "eighty-six", "87": "eighty-seven", "88": "eighty-eight", "89": "eighty-nine", "90": "ninety", "91": "ninety-one", "92": "ninety-two", "93": "ninety-three", "94": "ninety-four", "95": "ninety-five", "96": "ninety-six", "97": "ninety-seven", "98": "ninety-eight", "99": "ninety-nine", "100": "hundred", "1000": "thousand", "100000": "lac", "10000000": "crore", "1000000000": "arab", } # English-India num_dict["hi"] = { "0": "शून्य", "1": "एक", "2": "दो", "3": "तीन", "4": "चार", "5": "पाँच", "6": "छः", "7": "सात", "8": "आठ", "9": "नौ", "10": "दस", "11": "ग्यारह", "12": "बारह", "13": "तेरह", "14": "चौदह", "15": "पंद्रह", "16": "सोलह", "17": "सत्रह", "18": "अट्ठारह", "19": "उन्नीस", "20": "बीस", "21": "इक्कीस", "22": "बाईस", "23": "तेईस", "24": "चौबिस", "25": "पच्चीस", "26": "छब्बीस", "27": "सत्ताईस", "28": "अट्ठाईस", "29": "उनतीस", "30": "तीस", "31": "इकतीस", "32": "बत्तीस", "33": "तैंतीस", "34": "चौंतीस", "35": "पैंतीस", "36": "छत्तीस", "37": "सैंतीस", "38": "अड़तीस", "39": "उनतालीस", "40": "चालीस", "41": "इकतालीस", "42": "बयालीस", "43": "तैंतालीस", "44": "चौंतालीस", "45": "पैंतालीस", "46": "छियालीस", "47": "सैंतालीस", "48": "अड़तालीस", "49": "उनचास", "50": "पचास", "51": "इक्यावन​", "52": "बावन", "53": "तिरेपन", "54": "चौवन", "55": "पचपन", "56": "छप्पन", "57": "सत्तावन", "58": "अट्ठावन", "59": "उनसठ", "60": "साठ", "61": "इकसठ", "62": "बासठ", "63": "तिरेसठ", "64": "चौंसठ", "65": "पैंसठ", "66": "छयासठ", "67": "सरसठ​", "68": "अड़सठ", "69": "उनहत्तर", "70": "सत्तर", "71": "इकहत्तर", "72": "बहत्तर", "73": "तिहत्तर", "74": "चौहत्तर", "75": "पचहत्तर", "76": "छिहत्तर", "77": "सतहत्तर", "78": "अठहत्तर", "79": "उन्यासी", "80": "अस्सी", "81": "इक्यासी", "82": "बयासी", "83": "तिरासी", "84": "चौरासी", "85": "पचासी", "86": "छियासी", "87": "सत्तासी", "88": "अठासी", "89": "नवासी", "90": "नब्बे", "91": "इक्यानवे", "92": "बानवे", "93": "तिरानवे", "94": "चौरानवे", "95": "पचानवे", "96": "छियानवे", "97": "सत्तानवे", "98": "अट्ठानवे", "99": "निन्यानवे", "100": "सौ", "1000": "हज़ार", "100000": "लाख", "10000000": "करोड़", "1000000000": "अरब", } # Hindi num_dict["gu"] = { "0": "શૂન્ય", "1": "એક", "2": "બે", "3": "ત્રણ", "4": "ચાર", "5": "પાંચ", "6": "છ", "7": "સાત", "8": "આઠ", "9": "નવ", "10": "દસ", "11": "અગિયાર", "12": "બાર", "13": "તેર", "14": "ચૌદ", "15": "પંદર", "16": "સોળ", "17": "સત્તર", "18": "અઢાર", "19": "ઓગણિસ", "20": "વીસ", "21": "એકવીસ", "22": "બાવીસ", "23": "તેવીસ", "24": "ચોવીસ", "25": "પચ્ચીસ", "26": "છવીસ", "27": "સત્તાવીસ", "28": "અઠ્ઠાવીસ", "29": "ઓગણત્રીસ", "30": "ત્રીસ", "31": "એકત્રીસ", "32": "બત્રીસ", "33": "તેત્રીસ", "34": "ચોત્રીસ", "35": "પાંત્રીસ", "36": "છત્રીસ", "37": "સડત્રીસ", "38": "અડત્રીસ", "39": "ઓગણચાલીસ", "40": "ચાલીસ", "41": "એકતાલીસ", "42": "બેતાલીસ", "43": "ત્રેતાલીસ", "44": "ચુંમાલીસ", "45": "પિસ્તાલીસ", "46": "છેતાલીસ", "47": "સુડતાલીસ", "48": "અડતાલીસ", "49": "ઓગણપચાસ", "50": "પચાસ", "51": "એકાવન", "52": "બાવન", "53": "ત્રેપન", "54": "ચોપન", "55": "પંચાવન", "56": "છપ્પન", "57": "સત્તાવન", "58": "અઠ્ઠાવન", "59": "ઓગણસાઠ", "60": "સાઈઠ", "61": "એકસઠ", "62": "બાસઠ", "63": "ત્રેસઠ", "64": "ચોસઠ", "65": "પાંસઠ", "66": "છાસઠ", "67": "સડસઠ", "68": "અડસઠ", "69": "અગણોસિત્તેર", "70": "સિત્તેર", "71": "એકોતેર", "72": "બોતેર", "73": "તોતેર", "74": "ચુમોતેર", "75": "પંચોતેર", "76": "છોતેર", "77": "સિત્યોતેર", "78": "ઇઠ્યોતેર", "79": "ઓગણાએંસી", "80": "એંસી", "81": "એક્યાસી", "82": "બ્યાસી", "83": "ત્યાસી", "84": "ચોર્યાસી", "85": "પંચાસી", "86": "છ્યાસી", "87": "સિત્યાસી", "88": "ઈઠ્યાસી", "89": "નેવ્યાસી", "90": "નેવું", "91": "એકાણું", "92": "બાણું", "93": "ત્રાણું", "94": "ચોરાણું", "95": "પંચાણું", "96": "છન્નું", "97": "સત્તાણું", "98": "અઠ્ઠાણું", "99": "નવ્વાણું", "100": "સો", "1000": "હજાર", "100000": "લાખ", "1000000": "દસ લાખ", "10000000": "કરોડ઼", } # Gujarati num_dict["mr"] = { "0": "शून्य", "1": "एक", "2": "दोन", "3": "तीन", "4": "चार", "5": "पाच", "6": "सहा", "7": "सात", "8": "आठ", "9": "नऊ", "10": "दहा", "11": "अकरा", "12": "बारा", "13": "तेरा", "14": "चौदा", "15": "पंधरा", "16": "सोळा", "17": "सतरा", "18": "अठरा", "19": "एकोणीस", "20": "वीस", "21": "एकवीस", "22": "बावीस", "23": "तेवीस", "24": "चोवीस", "25": "पंचवीस", "26": "सव्वीस", "27": "सत्तावीस", "28": "अठ्ठावीस", "29": "एकोणतीस", "30": "तीस", "31": "एकतीस", "32": "बत्तीस", "33": "तेहेतीस", "34": "चौतीस", "35": "पस्तीस", "36": "छत्तीस", "37": "सदतीस", "38": "अडतीस", "39": "एकोणचाळीस", "40": "चाळीस", "41": "एक्केचाळीस", "42": "बेचाळीस", "43": "त्रेचाळीस", "44": "चव्वेचाळीस", "45": "पंचेचाळीस", "46": "सेहेचाळीस", "47": "सत्तेचाळीस", "48": "अठ्ठेचाळीस", "49": "एकोणपन्नास", "50": "पन्नास", "51": "एक्कावन्न", "52": "बावन्न", "53": "त्रेपन्न", "54": "चोपन्न", "55": "पंचावन्न", "56": "छप्पन्न", "57": "सत्तावन्न", "58": "अठ्ठावन्न", "59": "एकोणसाठ", "60": "साठ", "61": "एकसष्ठ", "62": "बासष्ठ", "63": "त्रेसष्ठ", "64": "चौसष्ठ", "65": "पासष्ठ", "66": "सहासष्ठ", "67": "सदुसष्ठ", "68": "अडुसष्ठ", "69": "एकोणसत्तर", "70": "सत्तर", "71": "एक्काहत्तर", "72": "बाहत्तर", "73": "त्र्याहत्तर", "74": "चौर्‍याहत्तर", "75": "पंच्याहत्तर", "76": "शहात्तर", "77": "सत्याहत्तर", "78": "अठ्ठ्याहत्तर", "79": "एकोण ऐंशी", "80": "ऐंशी", "81": "एक्क्याऐंशी", "82": "ब्याऐंशी", "83": "त्र्याऐंशी", "84": "चौऱ्याऐंशी", "85": "पंच्याऐंशी", "86": "शहाऐंशी", "87": "सत्त्याऐंशी", "88": "अठ्ठ्याऐंशी", "89": "एकोणनव्वद", "90": "नव्वद", "91": "एक्क्याण्णव", "92": "ब्याण्णव", "93": "त्र्याण्णव", "94": "चौऱ्याण्णव", "95": "पंच्याण्णव", "96": "शहाण्णव", "97": "सत्त्याण्णव", "98": "अठ्ठ्याण्णव", "99": "नव्व्याण्णव", "100": "शे", "1000": "हजार", "100000": "लाख", "10000000": "कोटी", "1000000000": "अब्ज", } # Marathi num_dict["bn"] = { "0": "শূন্য", "1": "এক", "2": "দুই", "3": "তিন", "4": "চার", "5": "পাঁচ", "6": "ছয়", "7": "সাত", "8": "আট", "9": "নয়", "10": "দশ", "11": "এগার", "12": "বার", "13": "তের", "14": "চৌদ্দ", "15": "পনের", "16": "ষোল", "17": "সতের", "18": "আঠার", "19": "ঊনিশ", "20": "বিশ", "21": "একুশ", "22": "বাইশ", "23": "তেইশ", "24": "চব্বিশ", "25": "পঁচিশ", "26": "ছাব্বিশ", "27": "সাতাশ", "28": "আঠাশ", "29": "ঊনত্রিশ", "30": "ত্রিশ", "31": "একত্রিশ", "32": "বত্রিশ", "33": "তেত্রিশ", "34": "চৌত্রিশ", "35": "পঁয়ত্রিশ", "36": "ছত্রিশ", "37": "সাঁইত্রিশ", "38": "আটত্রিশ", "39": "ঊনচল্লিশ", "40": "চল্লিশ", "41": "একচল্লিশ", "42": "বিয়াল্লিশ", "43": "তেতাল্লিশ", "44": "চুয়াল্লিশ", "45": "পঁয়তাল্লিশ", "46": "ছেচল্লিশ", "47": "সাতচল্লিশ", "48": "আটচল্লিশ", "49": "ঊনপঞ্চাশ", "50": "পঞ্চাশ", "51": "একান্ন", "52": "বায়ান্ন", "53": "তিপ্পান্ন", "54": "চুয়ান্ন", "55": "পঞ্চান্ন", "56": "ছাপ্পান্ন", "57": "সাতান্ন", "58": "আটান্ন", "59": "ঊনষাট", "60": "ষাট", "61": "একষট্টি", "62": "বাষট্টি", "63": "তেষট্টি", "64": "চৌষট্টি", "65": "পঁয়ষট্টি", "66": "ছেষট্টি", "67": "সাতষট্টি", "68": "আটষট্টি", "69": "ঊনসত্তর", "70": "সত্তর", "71": "একাত্তর", "72": "বাহাত্তর", "73": "তিয়াত্তর", "74": "চুয়াত্তর", "75": "পঁচাত্তর", "76": "ছিয়াত্তর", "77": "সাতাত্তর", "78": "আটাত্তর", "79": "ঊনআশি", "80": "আশি", "81": "একাশি", "82": "বিরাশি", "83": "তিরাশি", "84": "চুরাশি", "85": "পঁচাশি", "86": "ছিয়াশি", "87": "সাতাশি", "88": "আটাশি", "89": "ঊননব্বই", "90": "নব্বই", "91": "একানব্বই", "92": "বিরানব্বই", "93": "তিরানব্বই", "94": "চুরানব্বই", "95": "পঁচানব্বই", "96": "ছিয়ানব্বই", "97": "সাতানব্বই", "98": "আটানব্বই", "99": "নিরানব্বই", "100": "শো", "1000": "হাজার", "100000": "লাখ", "10000000": "কোটি", "1000000000": "একশ’ কোটি", } # Bengali num_dict["te"] = { "0": "సున్నా", "1": "ఒకటి", "2": "రెండు", "3": "మూడు", "4": "నాలుగు", "5": "ఐదు", "6": "ఆరు", "7": "ఏడు", "8": "ఎనిమిది", "9": "తొమ్మిది", "10": "పది", "11": "పదకొండు", "12": "పన్నెండు", "13": "పదమూడు", "14": "పద్నాలుగు", "15": "పదిహేను", "16": "పదహారు", "17": "పదిహేడు", "18": "పద్దెనిమిది", "19": "పందొమ్మిది", "20": "ఇరవై", "21": "ఇరవై ఒకటి", "22": "ఇరవై రెండు", "23": "ఇరవై మూడు", "24": "ఇరవై నాలుగు", "25": "ఇరవై ఐదు", "26": "ఇరవై ఆరు", "27": "ఇరవై ఏడు", "28": "ఇరవై ఎనిమిది", "29": "ఇరవై తొమ్మిది", "30": "ముప్పై", "31": "ముప్పై ఒకటి", "32": "ముప్పై రెండు", "33": "ముప్పై మూడు", "34": "ముప్పై నాలుగు", "35": "ముప్పై ఐదు", "36": "ముప్పై ఆరు", "37": "ముప్పై ఏడు", "38": "ముప్పై ఎనిమిది", "39": "ముప్పై తొమ్మిది", "40": "నలభై", "41": "నలభై ఒకటి", "42": "నలభై రెండు", "43": "నలభై మూడు", "44": "నలభై నాలుగు", "45": "నలభై ఐదు", "46": "నలభై ఆరు", "47": "నలభై ఏడు", "48": "నలభై ఎనిమిది", "49": "నలభై తొమ్మిది", "50": "యాభై", "51": "యాభై ఒకటి", "52": "యాభై రెండు", "53": "యాభై మూడు", "54": "యాభై నాలుగు", "55": "యాభై ఐదు", "56": "యాభై ఆరు", "57": "యాభై ఏడు", "58": "యాభై ఎనిమిది", "59": "యాభై తొమ్మిది", "60": "అరవై", "61": "అరవై ఒకటి", "62": "అరవై రెండు", "63": "అరవై మూడు", "64": "అరవై నాలుగు", "65": "అరవై ఐదు", "66": "అరవై ఆరు", "67": "అరవై ఏడు", "68": "అరవై ఎనిమిది", "69": "అరవై తొమ్మిది", "70": "డెబ్బై", "71": "డెబ్బై ఒకటి", "72": "డెబ్బై రెండు", "73": "డెబ్బై మూడు", "74": "డెబ్బై నాలుగు", "75": "డెబ్బై ఐదు", "76": "డెబ్బై ఆరు", "77": "డెబ్బై ఏడు", "78": "డెబ్బై ఎనిమిది", "79": "డెబ్బై తొమ్మిది", "80": "ఎనభై", "81": "ఎనభై ఒకటి", "82": "ఎనభై రెండు", "83": "ఎనభై మూడు", "84": "ఎనభై నాలుగు", "85": "ఎనభై ఐదు", "86": "ఎనభై ఆరు", "87": "ఎనభై ఏడు", "88": "ఎనభై ఎనిమిది", "89": "ఎనభై తొమ్మిది", "90": "తొంభై", "91": "తొంభై ఒకటి", "92": "తొంభై రెండు", "93": "తొంభై మూడు", "94": "తొంభై నాలుగు", "95": "తొంభై ఐదు", "96": "తొంభై ఆరు", "97": "తొంభై ఏడు", "98": "తొంభై ఎనిమిది", "99": "తొంభై తొమ్మిది", "100": "వందల", "1000": "వేల", "100000": "లక్షల", "10000000": "కోట్ల", "1000000000": "బిలియన్", } # Telugu num_dict["ta"] = { "0": "பூஜ்ஜியம்", "1": "ஒன்று", "2": "இரண்டு", "3": "மூன்று", "4": "நான்கு", "5": "ஐந்து", "6": "ஆறு", "7": "ஏழு", "8": "எட்டு", "9": "ஒன்பது", "10": "பத்து", "11": "பதினொன்று", "12": "பன்னிரண்டு", "13": "பதிமூன்று", "14": "பதினான்கு", "15": "பதினைந்து", "16": "பதினாறு", "17": "பதினேழு", "18": "பதினெட்டு", "19": "பத்தொன்பது", "20": "இருபது", "21": "இருபது ஒன்று", "22": "இருபத்து இரண்டு", "23": "இருபத்து மூன்று", "24": "இருபத்து நான்கு", "25": "இருபத்து ஐந்து", "26": "இருபத்து ஆறு", "27": "இருபத்து ஏழு", "28": "இருபத்து எட்டு", "29": "இருபத்து ஒன்பது", "30": "முப்பது", "31": "முப்பத்து ஒன்று", "32": "முப்பத்து இரண்டு", "33": "முப்பத்து மூன்று", "34": "முப்பத்து நான்கு", "35": "முப்பத்து ஐந்து", "36": "முப்பத்து ஆறு", "37": "முப்பத்து ஏழு", "38": "முப்பத்து எட்டு", "39": "முப்பத்து ஒன்பது", "40": "நாற்பது", "41": "நாற்பத்து ஒன்று", "42": "நாற்பத்து இரண்டு", "43": "நாற்பத்து மூன்று", "44": "நாற்பத்து நான்கு", "45": "நாற்பத்து ஐந்து", "46": "நாற்பத்து ஆறு", "47": " நாற்பத்து ஏழு", "48": "நாற்பத்து எட்டு", "49": "நாற்பத்து ஒன்பது", "50": "ஐம்பது", "51": "ஐம்பத்து ஒன்று", "52": "ஐம்பத்து இரண்டு", "53": "ஐம்பத்து மூன்று", "54": "ஐம்பத்து நான்கு", "55": "ஐம்பத்து ஐந்து", "56": "ஐம்பத்து ஆறு", "57": "ஐம்பத்து ஏழு", "58": "ஐம்பத்து எட்டு", "59": "ஐம்பத்து ஒன்பது", "60": "அறுபது", "61": "அறுபத்து ஒன்று", "62": "அறுபத்து இரண்டு", "63": "அறுபத்து மூன்று", "64": "அறுபத்து நான்கு", "65": "அறுபத்து ஐந்து", "66": "அறுபத்து ஆறு", "67": "அறுபத்து ஏழு", "68": "அறுபத்து எட்டு", "69": "அறுபத்து ஒன்பது", "70": "எழுபது", "71": "எழுபத்தி ஒன்று", "72": "எழுபத்தி இரண்டு", "73": "எழுபத்தி முச்சக்கர", "74": "எழுபத்தி நான்கு", "75": "எழுபத்தி ஐந்து", "76": "எழுபத்தி ஆறு", "77": "எழுபத்தி ஏழு", "78": "எழுபத்தி எட்டு", "79": "எழுபத்தி ஒன்பது", "80": "எண்பது", "81": "எண்பத்தியொன்று", "82": "எண்பத்திரண்டு", "83": "எண்பத்திமூன்று", "84": "என்பதினான்கு", "85": "என்பதினைந்து", "86": "எண்பத்திஆறு", "87": "எண்பத்திஏழு", "88": "எண்பத்தியெட்டு", "89": "எண்பத்தியொன்பது", "90": "தொன்னூறு", "91": "தொண்ணூற்றியொன்று", "92": "தொண்ணூற்றிரண்டு", "93": "தொண்ணூற்றிமூன்று", "94": "தொண்ணூற்றிநான்கு", "95": "தொண்ணூற்றிஐந்து", "96": "தொண்ணூற்றியாறு", "97": "தொண்ணூற்றியேழு", "98": "தொண்ணூற்றியெட்டு", "99": "தொண்ணூற்றிஒன்பது", "100": "நூறு", "1000": "ஆயிரம்", "100000": "இலட்சம்", "10000000": "கோடி", "1000000000": "பில்லியன்", } # Tamil num_dict["kn"] = { "0": "ಸೊನ್ನೆ", "1": "ಒಂದು", "2": "ಎರಡು", "3": "ಮೂರು", "4": "ನಾಲ್ಕು", "5": "ಅಯ್ದು", "6": "ಆರು", "7": "ಏಳು", "8": "ಎಂಟು", "9": "ಒಂಬತ್ತು", "10": "ಹತ್ತು", "11": "ಹನ್ನೊಂದು", "12": "ಹನ್ನೆರಡು", "13": "ಹದಿಮೂರು", "14": "ಹದಿನಾಲ್ಕು", "15": "ಹದಿನೈದು", "16": "ಹದಿನಾರು", "17": "ಹದಿನೇಳು", "18": "ಹದಿನೆಂಟು", "19": "ಹತ್ತೊಂಬತ್ತು", "20": "ಇಪ್ಪತ್ತು", "21": "ಇಪ್ಪತ್ತ್’ಒಂದು", "22": "ಇಪ್ಪತ್ತ್’ಎರಡು", "23": "ಇಪ್ಪತ್ತ್’ಮೂರು", "24": "ಇಪ್ಪತ್ತ್’ನಾಲ್ಕು", "25": "ಇಪ್ಪತ್ತ್’ಐದು", "26": "ಇಪ್ಪತ್ತ್’ಆರು", "27": "ಇಪ್ಪತ್ತ್’ಏಳು", "28": "ಇಪ್ಪತ್ತ್’ಎಂಟು", "29": "ಇಪ್ಪತ್ತ್’ಒಂಬತ್ತು", "30": "ಮೂವತ್ತು", "31": "ಮುವತ್ತ್’ಒಂದು", "32": "ಮುವತ್ತ್’ಎರಡು", "33": "ಮುವತ್ತ್’ಮೂರು", "34": "ಮೂವತ್ತ್’ನಾಲ್ಕು", "35": "ಮೂವತ್ತ್’ಐದು", "36": "ಮೂವತ್ತ್’ಆರು", "37": "ಮೂವತ್ತ್’ಏಳು", "38": "ಮೂವತ್ತ್’ಎಂಟು", "39": "ಮೂವತ್ತ್’ಒಂಬತ್ತು", "40": "ನಲವತ್ತು", "41": "ನಲವತ್ತೊಂದು", "42": "ನಲವತ್ತ್ ಎರಡು", "43": "ನಲವತ್ತ್ ಮೂರು", "44": "ನಲವತ್ತ್ ನಾಲ್ಕು", "45": "ನಲವತ್ತೈದು", "46": "ನಲವತ್ತಾರು", "47": "ನಲವತ್ತೇಳು", "48": "ನಲವತ್ತೆಂಟು", "49": "ನಲವತ್ತೊಂಬತ್ತು", "50": "ಐವತ್ತು", "51": "ಐವತ್ತೊಂದು", "52": "ಐವತ್ತೆರಡು", "53": "ಐವತ್ತಮೂರು", "54": "ಐವತ್ತ್ನಾಲ್ಕು", "55": "ಐವತ್ತೈದು", "56": "ಐವತ್ತಾರು", "57": "ಐವತ್ತೇಳು", "58": "ಐವತ್ತೆಂಟು", "59": "ಐವತ್ತೊಂಬತ್ತು", "60": "ಅರವತ್ತು", "61": "ಅರವತ್ತೊಂದು", "62": "ಅರವತ್ತೆರಡು", "63": "ಅರವತ್ತ್ ಮೂರು", "64": "ಅರವತ್ತ್ ನಾಲ್ಕು", "65": "ಅರವತ್ತೈದು", "66": "ಅರವತ್ತಾರು", "67": "ಅರವತ್ತೇಳು", "68": "ಅರವತ್ತೆಂಟು", "69": "ಅರವತ್ತೊಂಬತ್ತು", "70": "ಎಪ್ಪತ್ತು", "71": "ಎಪ್ಪತ್ತೊಂದು", "72": "ಎಪ್ಪತ್ತೆರಡು", "73": "ಎಪ್ಪತ್ತ್ ಮೂರು", "74": "ಎಪ್ಪತ್ತ್ ನಾಲ್ಕು", "75": "ಎಪ್ಪತ್ತೈದು", "76": "ಎಪ್ಪತ್ತಾರು", "77": "ಎಪ್ಪತ್ತೇಳು", "78": "ಎಪ್ಪತ್ತೆಂಟು", "79": "ಎಪ್ಪತ್ತೊಂಬತ್ತು", "80": "ಎಂಬತ್ತು", "81": "ಎಂಬತ್ತೊಂದು", "82": "ಎಂಬತ್ತೆರಡು", "83": "ಎಂಬತ್ತ್ ಮೂರು", "84": "ಎಂಬತ್ತ್ ನಾಲ್ಕು", "85": "ಎಂಬತ್ತೈದು", "86": "ಎಂಬತ್ತಾರು", "87": "ಎಂಬತ್ತೇಳು", "88": "ಎಂಬತ್ತೆಂಟು", "89": "ಎಂಬತ್ತೊಂಬತ್ತು", "90": "ತೊಂಬತ್ತು", "91": "ತೊಂಬತ್ತೊಂದು", "92": "ತೊಂಬತ್ತೆರಡು", "93": "ತೊಂಬತ್ತ ಮೂರು", "94": "ತೊಂಬತ್ತ ನಾಲ್ಕು", "95": "ತೊಂಬತ್ತೈದು", "96": "ತೊಂಬತ್ತಾರು", "97": "ತೊಂಬತ್ತೇಳು", "98": "ತೊಂಬತ್ತೆಂಟು", "99": "ತೊಂಬತ್ತೊಂಬತ್ತು", "100": "ನೂರ", "1000": "ಸಾವಿರದ", "100000": "ಲಕ್ಷದ", "10000000": "ಕೋಟಿ", "1000000000": "ಶತಕೋಟಿ", } # Kannada num_dict["or"] = { "0": "ଶୁନ୍ୟ", "1": "ଏକ", "2": "ଦୁଇ", "3": "ତିନି", "4": "ଚାରି", "5": "ପାଞ୍ଚ", "6": "ଛଅ", "7": "ସାତ", "8": "ଆଠ", "9": "ନଅ", "10": "ନଅ", "11": "ଏଗାର", "12": "ବାର", "13": "ତେର", "14": "ଚଉଦ", "15": "ପନ୍ଦର", "16": "ଷୋହଳ", "17": "ସତର", "18": "ଅଠର", "19": "ଊଣାଇଶ", "20": "କୋଡିଏ", "21": "ଏକୋଇଶି", "22": "ବାଇଶି", "23": "ତେଇଶି", "24": "ଚବିଶି", "25": "ପଚିଶି", "26": "ଛବିଶି", "27": "ସତାଇଶି", "28": "ଅଠାଇଶି", "29": "ଅଣତିରିଶି", "30": "ତିରିଶି", "31": "ଏକତିରିଶି", "32": "ବତିଶି", "33": "ତେତିଶି", "34": "ଚଉତିରିଶି", "35": "ପଞ୍ଚତିରିଶି", "36": "ଛତିଶି", "37": "ସଂଇତିରିଶି", "38": "ଅଠତିରିଶି", "39": "ଅଣଚାଳିଶି", "40": "ଚାଳିଶି", "41": "ଏକଚାଳିଶି", "42": "ବୟାଳିଶି", "43": "ତେୟାଳିଶି", "44": "ଚଉରାଳିଶି", "45": "ପଞ୍ଚଚାଳିଶି", "46": "ଛୟାଳିଶି", "47": "ସତଚାଳିଶି", "48": "ଅଠଚାଳିଶି", "49": "ଅଣଚାଶ", "50": "ପଚାଶ", "51": "ଏକାବନ", "52": "ବାଉନ", "53": "ତେପନ", "54": "ଚଉବନ", "55": "ପଞ୍ଚାବନ", "56": "ଛପନ", "57": "ସତାବନ", "58": "ଅଠାବନ", "59": "ଅଣଷଠି", "60": "ଷାଠିଏ", "61": "ଏକଷଠି", "62": "ବାଷଠି", "63": "ତେଷଠି", "64": "ଚଉଷଠି", "65": "ପଞ୍ଚଷଠି", "66": "ଛଅଷଠି", "67": "ସତଷଠି", "68": "ଅଠଷଠି", "69": "ଅଣସ୍ତରୀ", "70": "ସତୂରୀ", "71": "ଏକସ୍ତରୀ", "72": "ବାସ୍ତରୀ", "73": "ତେସ୍ତରୀ", "74": "ଚଉସ୍ତରୀ", "75": "ପଞ୍ଚସ୍ତରୀ", "76": "ଛଅସ୍ତରୀ", "77": "ସତସ୍ତରୀ", "78": "ଅଠସ୍ତରୀ", "79": "ଅଣାଅଶୀ", "80": "ଅଶୀ", "81": "ଏକାଅଶୀ", "82": "ବୟାଅଶୀ", "83": "ତେୟାଅଶୀ", "84": "ଚଉରାଅଶୀ", "85": "ପଞ୍ଚାଅଶୀ", "86": "ଛୟାଅଶୀ", "87": "ସତାଅଶୀ", "88": "ଅଠାଅଶୀ", "89": "ଅଣାନବେ", "90": "ନବେ", "91": "ଏକାନବେ", "92": "ବୟାନବେ", "93": "ତେୟାନବେ", "94": "ଚଉରାନବେ", "95": "ପଞ୍ଚାନବେ", "96": "ଛୟାନବେ", "97": "ସତାନବେ", "98": "ଅଠାନବେ", "99": "ଅନେଶତ", "100": "ଶହେ", "1000": "ହଜାର", "100000": "ଲକ୍ଷ", "10000000": "କୋଟି", "1000000000": "କୋଟି", } # Oriya num_dict["pa"] = { "0": "ਸਿਫਰ ", "1": "ਇੱਕ", "2": "ਦੋ", "3": "ਤਿੰਨ", "4": "ਚਾਰ", "5": "ਪੰਜ", "6": "ਛੇ", "7": "ਸੱਤ", "8": "ਅੱਠ", "9": "ਨੌਂ", "10": "ਦੱਸ", "11": "ਗਿਆਰਾਂ", "12": "ਬਾਰਾਂ", "13": "ਤੇਰਾਂ", "14": "ਚੌਦਾਂ", "15": "ਪੰਦਰਾਂ", "16": "ਸੋਲ਼ਾਂ", "17": "ਸਤਾਰਾਂ", "18": "ਅਠਾਰਾਂ", "19": "ਉਨੀ", "20": "ਵੀਹ", "21": "ਇੱਕੀ", "22": "ਬਾਈ", "23": "ਤੇਈ", "24": "ਚੌਵੀ", "25": "ਪੰਝੀ", "26": "ਛੱਬੀ", "27": "ਸਤਾਈ", "28": "ਅਠਾਈ", "29": "ਉਨੱਤੀ", "30": "ਤੀਹ", "31": "ਇਕੱਤੀ", "32": "ਬੱਤੀ", "33": "ਤੇਤੀ", "34": "ਚੌਂਤੀ", "35": "ਪੈਂਤੀ", "36": "ਛੱਤੀ", "37": "ਸੈਂਤੀ", "38": "ਅਠੱਤੀ", "39": "ਉਨਤਾਲੀ", "40": "ਚਾਲੀ", "41": "ਇਕਤਾਲੀ", "42": "ਬਤਾਲੀ", "43": "ਤਰਤਾਲੀ", "44": "ਚੌਤਾਲੀ", "45": "ਪੰਜਤਾਲੀ", "46": "ਛਿਆਲੀ", "47": "ਸੰਤਾਲੀ", "48": "ਅੱਠਤਾਲੀ", "49": "ਉਣਿੰਜਾ", "50": "ਪੰਜਾਹ", "51": "ਇਕਵਿੰਜਾ", "52": "ਬਵਿੰਜਾ", "53": "ਤਰਵਿੰਜਾ", "54": "ਚਰਿੰਜਾ", "55": "ਪਚਵਿੰਜਾ", "56": "ਛਪਿੰਜਾ", "57": "ਸਤਵਿੰਜਾ", "58": "ਅੱਠਵਿੰਜਾ", "59": "ਉਣਾਠ", "60": "ਸੱਠ", "61": "ਇਕਾਠ", "62": "ਬਾਠ੍ਹ", "63": "ਤਰੇਠ੍ਹ", "64": "ਚੌਠ੍ਹ", "65": "ਪੈਂਠ", "66": "ਛਿਆਠ", "67": "ਸਤਾਹਠ", "68": "ਅੱਠਾਠ", "69": "ਉਣੱਤਰ", "70": "ਸੱਤਰ", "71": "ਇਕ੍ਹੱਤਰ", "72": "ਬਹੱਤਰ", "73": "ਤਹੱਤਰ", "74": "ਚੌਹੱਤਰ", "75": "ਪੰਜੱਤਰ", "76": "ਛਿਹੱਤਰ", "77": "ਸਤੱਤਰ", "78": "ਅਠੱਤਰ", "79": "ਉਣਾਸੀ", "80": "ਅੱਸੀ", "81": "ਇਕਾਸੀ", "82": "ਬਿਆਸੀ", "83": "ਤਰਾਸੀ", "84": "ਚਰਾਸੀ", "85": "ਪੰਜਾਸੀ", "86": "ਛਿਆਸੀ", "87": "ਸਤਾਸੀ", "88": "ਅਠਾਸੀ", "89": "ਉਣਾਨਵੇਂ", "90": "ਨੱਬੇ", "91": "ਇਕਾਨਵੇਂ", "92": "ਬਿਆਨਵੇਂ", "93": "ਤਰਾਨਵੇਂ", "94": "ਚਰਾਨਵੇਂ", "95": "ਪਚਾਨਵੇਂ", "96": "ਛਿਆਨਵੇਂ", "97": "ਸਤਾਨਵੇਂ", "98": "ਅਠਾਨਵੇਂ", "99": "ਨਿੜਾਨਵੇਂ", "100": "ਸੌ", "1000": "ਹਜਾਰ", "100000": "ਲੱਖ", "10000000": "ਕਰੋੜ", "1000000000": "ਅਰਬ", } # Punjabi # --------------------------- num_to_word.py ------------------------------ """ Method to convert Numbers to Words for indian languages Use cases:- 1) Speech recognition pre-processing 2) Language modeling Data pre-processing ------------------------- check indic_numbers.py to add support for any indian language """ def language_specific_exception(words, lang, combiner): """ Language Specific Exception will come here """ def occurs_at_end(piece): return words[-len(piece) :] == piece if lang == "mr": words = words.replace("एक" + combiner + "शे", "शंभर") elif lang == "gu": words = words.replace("બે" + combiner + "સો", "બસ્સો") elif lang == "te": exception_dict = { "1": "ఒక", "100": "వంద", "100+": "వందలు", "1000": "వెయ్యి", "1000+": "వేలు", "100000": "లక్ష", "100000+": "లక్షలు", "10000000": "కోటి", "10000000+": "కోట్లు", } test_case = ["100", "1000", "100000", "10000000"] for test in test_case: test_word = num_dict["te"][test] match = num_dict["te"]["1"] + combiner + test_word # for numbers like : 100, 1000, 100000 if words == match: return exception_dict[test] # for numbers like : 200, 4000, 800000 elif occurs_at_end(test_word): words = words.replace(test_word, exception_dict[test + "+"]) # for numbers like : 105, 1076, 123993 elif not occurs_at_end(match): replacement = exception_dict["1"] + combiner + exception_dict[test] words = words.replace(match, replacement) # Exception case for 101...199 special_case = "ఒక" + combiner + "వంద" words = words.replace(special_case, "నూట") elif lang == "kn": # special case for 100 if words == ("ಒಂದು" + combiner + "ನೂರ"): return "ನೂರು" exception_dict = { "ನೂರ": "ನೂರು", "ಸಾವಿರದ": "ಸಾವಿರ", "ಲಕ್ಷದ": "ಲಕ್ಷ", "ಕೋಟಿಯ": "ಕೋಟಿ", } for expt in exception_dict: if occurs_at_end(expt): words = words.replace(expt, exception_dict[expt]) return words def num_to_word(num, lang, separator=", ", combiner=" "): """ Main Method :param num: Number digits from any indian language :param lang: Language Code from supported Language :param separator: Separator character i.e. separator = '-' --> 'two hundred-sixty' :param combiner: combine number with position i.e. combiner = '-' --> 'two-hundred sixty' :return: UTF-8 String of numbers in words """ lang = lang.lower() num = str(num) # Load dictionary according to language code assert lang in supported_lang, "Language not supported" num_dic = num_dict[lang] # dash default combiner for english-india if (lang == "en") & (combiner == " "): combiner = "-" # Remove punctuations from numbers num = str(num).replace(",", "").replace(" ", "") # return word as it is if not number if not num.isdecimal(): return num # Replace native language numbers with english digits for language in supported_lang: for num_index in range(10): num = num.replace(all_num[language][num_index], all_num["en"][num_index]) # Assert that input contains only integer number for digit in num: assert digit in all_num["en"], "Give proper input" # Process # For Number longer than 9 digits def all_two_digit(digits_2): if len(digits_2) <= 1: # Provided only one/zero digit return num_dic.get(digits_2, "") elif digits_2 == "00": # Two Zero provided return num_dic["0"] + separator + num_dic["0"] elif digits_2[0] == "0": # First digit is zero return num_dic["0"] + separator + num_dic[digits_2[1]] else: # Both digit provided return num_dic[digits_2] # For Number less than 9 digits def two_digit(digits_2): digits_2 = digits_2.lstrip("0") if len(digits_2) != 0: return num_dic[digits_2] else: return "" def all_digit(digits): digits = digits.lstrip("0") digit_len = len(digits) if digit_len > 3: num_of_digits_to_process = (digit_len % 2) + 1 process_digits = digits[:num_of_digits_to_process] base = str(10 ** (int(digit_len / 2) * 2 - 1)) remain_digits = digits[num_of_digits_to_process:] return ( num_dic[process_digits] + combiner + num_dic[base] + separator + all_digit(remain_digits) ) elif len(digits) == 3: return ( num_dic[digits[:1]] + combiner + num_dic["100"] + separator + two_digit(digits[1:]) ) else: return two_digit(digits) num = num.lstrip("0") full_digit_len = len(num) if full_digit_len == 0: output = num_dic["0"] elif full_digit_len <= 9: output = all_digit(num) else: iteration = round(full_digit_len / 2) output = all_two_digit(num[:2]) # First to digit for i in range(1, iteration): output = ( output + separator + all_two_digit(num[i * 2 : (i + 1) * 2]) ) # Next two digit pairs remaining_digits = num[iteration * 2 :] if not all_two_digit(remaining_digits) == "": output = ( output + separator + all_two_digit(remaining_digits) ) # remaining Last one/two digits output = output.strip(separator) output = language_specific_exception(output, lang, combiner) return output # --------------------------------- num_to_word_on_a_sent --------------------------------- def is_digit(word, digit_pattern): return re.search(digit_pattern, word) def remove_punct(sent): clean = re.sub("[%s]" % re.escape(string.punctuation), " ", sent) return " ".join([word for word in clean.split() if word]) def normalize_nums(text, lang): """ text: str (eg) lang: lang code ['en', 'hi'] returns: str (eg) """ if lang in supported_lang: text = text.replace('-',' - ') # space separate hyphen words = text.split() lang_digits = [str(i) for i in range(0, 10)] digit_pattern = "[" + "".join(lang_digits) + "]" num_indices = [ ind for ind, word in enumerate(words) if is_digit(word, digit_pattern) ] words_up = [ num_to_word(word, lang, separator=" ", combiner=" ") if ind in num_indices else word for ind, word in enumerate(words) ] return " ".join(words_up) else: return text if __name__ == "__main__": print(normalize_nums("रीटा के पास 16 बिल्लियाँ हैं।", "hi"))