mrmft's picture
adding project files to space
0c969fd
raw
history blame
818 Bytes
from parsinorm import General_normalization
#normalize
def normalize(txt):
general_normalization = General_normalization()
txt = general_normalization.alphabet_correction(txt)
txt = general_normalization.semi_space_correction(txt)
txt = general_normalization.english_correction(txt)
txt = general_normalization.html_correction(txt)
txt = general_normalization.arabic_correction(txt)
txt = general_normalization.punctuation_correction(txt)
txt = general_normalization.specials_chars(txt)
txt = general_normalization.remove_emojis(txt)
txt = general_normalization.number_correction(txt)
txt = general_normalization.remove_not_desired_chars(txt)
txt = general_normalization.remove_repeated_punctuation(txt)
return ' '.join(txt.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ').split())