truyen-ngontinh / preprocess.py
tu
add app
6f6918a
raw
history blame
428 Bytes
# import VietnameseTextNormalizer
def preprocess(text):
text = text.lower().split("\n") # text = VietnameseTextNormalizer.Normalize(text.lower()).split("\n")
lines = []
for t in text:
t = t.replace(".", " . ")
t = t.replace("!", " . ")
t = t.replace("?", " . ")
t = t.replace(",", " , ")
t = t.rstrip().strip()
lines.append(t)
return ' . '.join(lines)