File size: 600 Bytes
4ee3884 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
import re
import math
def cleaning(text):
if isinstance(text, str):
text = text.lower()
text = re.sub(r'[^ ,.?!a-z0-9àáảãạăắằẳẵặâấầẩẫậèéẻẽẹêếềểễệđìíỉĩịòóỏõọôốồổỗộơớờởỡợùúủũụưứừửữựỳýỷỹỵ]', '', text)
text = re.sub(r'[.!?]+', '.', text)
text = re.sub(r'([,.!?])\1+', r'\1', text)
text = re.sub(r'[,]+', ' , ', text)
text = re.sub(r'[.]+', ' . ', text)
text = re.sub(r'([ ])\1+', r'\1', text)
return text
return text
|