Gladiator commited on
Commit
7cbe3c0
1 Parent(s): 890cbac

minimal cleaning trial

Browse files
Files changed (1) hide show
  1. utils.py +2 -2
utils.py CHANGED
@@ -22,12 +22,12 @@ def clean_text(x):
22
  x = re.sub(r"https*\S+", " ", x) # url
23
  x = re.sub(r"@\S+", " ", x) # mentions
24
  x = re.sub(r"#\S+", " ", x) # hastags
25
- x = x.replace("'", "") # remove ticks
26
  # x = re.sub("[%s]" % re.escape(string.punctuation), " ", x) # punctuation
27
  # x = re.sub(r"\w*\d+\w*", "", x) # numbers
28
  x = re.sub(r"\s{2,}", " ", x) # over spaces
29
  x = emoji_pattern.sub(r"", x) # emojis
30
- x = re.sub("[^A-Za-z0-9]+", " ", x) # special charachters
31
 
32
  return x
33
 
 
22
  x = re.sub(r"https*\S+", " ", x) # url
23
  x = re.sub(r"@\S+", " ", x) # mentions
24
  x = re.sub(r"#\S+", " ", x) # hastags
25
+ # x = x.replace("'", "") # remove ticks
26
  # x = re.sub("[%s]" % re.escape(string.punctuation), " ", x) # punctuation
27
  # x = re.sub(r"\w*\d+\w*", "", x) # numbers
28
  x = re.sub(r"\s{2,}", " ", x) # over spaces
29
  x = emoji_pattern.sub(r"", x) # emojis
30
+ # x = re.sub("[^A-Za-z0-9]+", " ", x) # special charachters
31
 
32
  return x
33