File size: 2,852 Bytes
79be08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from pythainlp.util import text_to_num, text_to_arabic_digit

class ThaiWord:

    def __init__(self) -> None:
        self.word_number = ['หนึ่ง','สอง','สาม','สี่','ห้า','หก','เจ็ด','แปด','เก้า']
        self.word_digit = ['สิบ','ร้อย','พัน','หมื่น','แสน','ล้าน']
        self.word_number_specific = ['เอ็ด', 'ยี่']
        self.word_digit_specific = ['สิบ']

    def iscontains11(self, word) -> bool:
        return self.word_number_specific[0] == word[-4:] or  \
            self.word_number_specific[0] == word[0:4]

    def iscontains2x(self, word) -> bool:
        return self.word_number_specific[1] == word[0:3]

    def words_to_number(self, words) -> str:
        num = ''

        if len(words) == 1 and words[0] in self.word_digit:
            # return text if the word is unit
            num = words
        else:
            try:
                num = text_to_num("".join(words))
                if len(num) > 0:
                    num = num[0]
            except Exception:
                for word in words:
                    num = f'{num}{text_to_arabic_digit(word)}'

        return f' {int(num):,} '

    def pretty(self, words) -> str:
        has_start_number = False
        number = []
        text = []

        for idx, word in enumerate(words):
            if has_start_number:
                if self.is_number(word) or self.is_digit(word):
                    number.append(word)
                else:
                    text.append(self.words_to_number(number))
                    has_start_number = False
                    number.clear()
            
            if not has_start_number:
                if self.is_start_number(word):
                    has_start_number = True
                    number.append(word)
                else:
                    text.append(word)

            if idx == len(words)-1 and len(number) > 0:
                text.append(self.words_to_number(number))
                    

        return ''.join(text)
    
    def is_start_number(self, word) -> bool:
        has_start_number = False
        if word in self.word_number or \
            word in self.word_digit or \
            self.iscontains2x(word) or  \
            self.iscontains11(word):

            has_start_number = True

        return has_start_number
    
    def is_digit(self, word) -> bool:
        has_digit = False
        if word in self.word_digit:
            has_digit = True

        return has_digit
    
    def is_number(self, word) -> bool:
        has_number = False
        if word in self.word_number or \
            word in self.word_number_specific or  \
            self.iscontains11(word):
            
            has_number = True

        return has_number