File size: 818 Bytes
0c969fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from parsinorm import General_normalization

#normalize
def normalize(txt):
  general_normalization = General_normalization()
  txt = general_normalization.alphabet_correction(txt)
  txt = general_normalization.semi_space_correction(txt)
  txt = general_normalization.english_correction(txt)
  txt = general_normalization.html_correction(txt)
  txt = general_normalization.arabic_correction(txt)
  txt = general_normalization.punctuation_correction(txt)
  txt = general_normalization.specials_chars(txt)
  txt = general_normalization.remove_emojis(txt)
  txt = general_normalization.number_correction(txt)
  txt = general_normalization.remove_not_desired_chars(txt)
  txt = general_normalization.remove_repeated_punctuation(txt)
  return ' '.join(txt.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ').split())