Spaces:
Running
Running
| def number_to_thai_text(num, digit_by_digit=False): | |
| # Thai numerals and place values | |
| thai_digits = { | |
| 0: "ศูนย์", 1: "หนึ่ง", 2: "สอง", 3: "สาม", 4: "สี่", | |
| 5: "ห้า", 6: "หก", 7: "เจ็ด", 8: "แปด", 9: "เก้า" | |
| } | |
| thai_places = ["", "สิบ", "ร้อย", "พัน", "หมื่น", "แสน", "ล้าน"] | |
| # Handle zero case | |
| if num == 0: | |
| return thai_digits[0] | |
| # If digit_by_digit is True, read each digit separately | |
| if digit_by_digit: | |
| return " ".join(thai_digits[int(d)] for d in str(num)) | |
| # For very large numbers, we'll process in chunks of millions | |
| if num >= 1000000: | |
| millions = num // 1000000 | |
| remainder = num % 1000000 | |
| result = number_to_thai_text(millions) + "ล้าน" | |
| if remainder > 0: | |
| result += number_to_thai_text(remainder) | |
| return result | |
| # Convert number to string and reverse it for easier place value processing | |
| num_str = str(num) | |
| digits = [int(d) for d in num_str] | |
| digits.reverse() # Reverse to process from units to highest place | |
| result = [] | |
| for i, digit in enumerate(digits): | |
| if digit == 0: | |
| continue # Skip zeros | |
| # Special case for tens place | |
| if i == 1: | |
| if digit == 1: | |
| result.append(thai_places[i]) # "สิบ" for 10-19 | |
| elif digit == 2: | |
| result.append("ยี่" + thai_places[i]) # "ยี่สิบ" for 20-29 | |
| else: | |
| result.append(thai_digits[digit] + thai_places[i]) | |
| # Special case for units place | |
| elif i == 0 and digit == 1: | |
| if len(digits) > 1 and digits[1] in [1, 2]: | |
| result.append("เอ็ด") # "เอ็ด" for 11, 21 | |
| else: | |
| result.append(thai_digits[digit]) | |
| else: | |
| result.append(thai_digits[digit] + thai_places[i]) | |
| # Reverse back and join | |
| result.reverse() | |
| return "".join(result) | |
| def replace_numbers_with_thai(text): | |
| import re | |
| # Function to convert matched number to Thai text | |
| def convert_match(match): | |
| num_str = match.group(0).replace(',', '') | |
| # Skip if the string is empty or invalid after removing commas | |
| if not num_str or num_str == '.': | |
| return match.group(0) | |
| # Handle decimal numbers | |
| if '.' in num_str: | |
| parts = num_str.split('.') | |
| integer_part = parts[0] | |
| decimal_part = parts[1] if len(parts) > 1 else '' | |
| # If integer part is empty, treat as 0 | |
| integer_value = int(integer_part) if integer_part else 0 | |
| # If integer part is too long (>7 digits), read digit by digit | |
| if len(integer_part) > 7: | |
| result = number_to_thai_text(integer_value, digit_by_digit=True) | |
| else: | |
| result = number_to_thai_text(integer_value) | |
| # Add decimal part if it exists | |
| if decimal_part: | |
| result += "จุด " + " ".join(number_to_thai_text(int(d)) for d in decimal_part) | |
| return result | |
| # Handle integer numbers | |
| num = int(num_str) | |
| if len(num_str) > 7: # If number exceeds 7 digits | |
| return number_to_thai_text(num, digit_by_digit=True) | |
| return number_to_thai_text(num) | |
| # Replace all numbers (with or without commas and decimals) in the text | |
| def process_text(text): | |
| # Split by spaces to process each word | |
| words = text.split() | |
| result = [] | |
| for word in words: | |
| # Match only valid numeric strings (allowing commas and one decimal point) | |
| if re.match(r'^[\d,]+(\.\d+)?$', word): # Valid number with optional decimal | |
| result.append(convert_match(re.match(r'[\d,\.]+', word))) | |
| else: | |
| # If word contains non-numeric characters, read numbers digit-by-digit | |
| if any(c.isdigit() for c in word): | |
| processed = "" | |
| num_chunk = "" | |
| for char in word: | |
| if char.isdigit(): | |
| num_chunk += char | |
| else: | |
| if num_chunk: | |
| processed += " ".join(number_to_thai_text(int(d)) for d in num_chunk) + " " | |
| num_chunk = "" | |
| processed += char + " " | |
| if num_chunk: # Handle any remaining numbers | |
| processed += " ".join(number_to_thai_text(int(d)) for d in num_chunk) | |
| result.append(processed.strip()) | |
| else: | |
| result.append(word) | |
| return " ".join(result) | |
| return process_text(text) | |
| # Test the functions | |
| if __name__ == "__main__": | |
| # Test number_to_thai_text | |
| test_numbers = [1, 12, 500, 6450, 100000, 12345678] | |
| for num in test_numbers: | |
| print(f"{num:,} -> {number_to_thai_text(num)}") | |
| # Test with decimals and mixed text | |
| test_texts = [ | |
| "ฉันมีเงิน 500 บาท", | |
| "ราคา 123.45 บาท", | |
| "บ้านเลขที่ 12 34", | |
| "วันที่ 15 08 2023", | |
| ] | |
| for text in test_texts: | |
| result = replace_numbers_with_thai(text) | |
| print(f"\nOriginal: {text}") | |
| print(f"Converted: {result}") | |