# import VietnameseTextNormalizer def preprocess(text): text = text.lower().split("\n") # text = VietnameseTextNormalizer.Normalize(text.lower()).split("\n") lines = [] for t in text: t = t.replace(".", " . ") t = t.replace("!", " . ") t = t.replace("?", " . ") t = t.replace(",", " , ") t = t.rstrip().strip() lines.append(t) return ' . '.join(lines)