hoang1007's picture
Upload 69 files
44db343
raw
history blame
No virus
461 Bytes
import re
from normalize import chuan_hoa_dau_tu_tieng_viet
import numpy as np
with open("common-vietnamese-syllables.txt", "r") as file:
vi_syllables = [line.strip("\n") for line in file.readlines()]
file = open("../../dataset/noising_resources/kieu_go_dau_cu_moi.txt", "w+")
for syllable in vi_syllables:
normalized = chuan_hoa_dau_tu_tieng_viet(syllable)
if normalized != syllable:
print(normalized, syllable, file = file)
file.close()