File size: 428 Bytes
6f6918a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# import VietnameseTextNormalizer

def preprocess(text):
    text = text.lower().split("\n") # text = VietnameseTextNormalizer.Normalize(text.lower()).split("\n")
    lines = []
    for t in text:
        t = t.replace(".", " . ")
        t = t.replace("!", " . ")
        t = t.replace("?", " . ")
        t = t.replace(",", " , ")
        
        t = t.rstrip().strip()
        lines.append(t)
    return ' . '.join(lines)