Gladiator commited on
Commit
6496ba4
1 Parent(s): 7cbe3c0

update clean text code

Browse files
Files changed (1) hide show
  1. utils.py +1 -1
utils.py CHANGED
@@ -27,7 +27,7 @@ def clean_text(x):
27
  # x = re.sub(r"\w*\d+\w*", "", x) # numbers
28
  x = re.sub(r"\s{2,}", " ", x) # over spaces
29
  x = emoji_pattern.sub(r"", x) # emojis
30
- # x = re.sub("[^A-Za-z0-9]+", " ", x) # special charachters
31
 
32
  return x
33
 
 
27
  # x = re.sub(r"\w*\d+\w*", "", x) # numbers
28
  x = re.sub(r"\s{2,}", " ", x) # over spaces
29
  x = emoji_pattern.sub(r"", x) # emojis
30
+ x = re.sub("[^.,!?A-Za-z0-9]+", " ", x) # special charachters except .,!?
31
 
32
  return x
33