minimal cleaning trial
Browse files
utils.py
CHANGED
@@ -22,12 +22,12 @@ def clean_text(x):
|
|
22 |
x = re.sub(r"https*\S+", " ", x) # url
|
23 |
x = re.sub(r"@\S+", " ", x) # mentions
|
24 |
x = re.sub(r"#\S+", " ", x) # hastags
|
25 |
-
x = x.replace("'", "") # remove ticks
|
26 |
# x = re.sub("[%s]" % re.escape(string.punctuation), " ", x) # punctuation
|
27 |
# x = re.sub(r"\w*\d+\w*", "", x) # numbers
|
28 |
x = re.sub(r"\s{2,}", " ", x) # over spaces
|
29 |
x = emoji_pattern.sub(r"", x) # emojis
|
30 |
-
x = re.sub("[^A-Za-z0-9]+", " ", x) # special charachters
|
31 |
|
32 |
return x
|
33 |
|
|
|
22 |
x = re.sub(r"https*\S+", " ", x) # url
|
23 |
x = re.sub(r"@\S+", " ", x) # mentions
|
24 |
x = re.sub(r"#\S+", " ", x) # hastags
|
25 |
+
# x = x.replace("'", "") # remove ticks
|
26 |
# x = re.sub("[%s]" % re.escape(string.punctuation), " ", x) # punctuation
|
27 |
# x = re.sub(r"\w*\d+\w*", "", x) # numbers
|
28 |
x = re.sub(r"\s{2,}", " ", x) # over spaces
|
29 |
x = emoji_pattern.sub(r"", x) # emojis
|
30 |
+
# x = re.sub("[^A-Za-z0-9]+", " ", x) # special charachters
|
31 |
|
32 |
return x
|
33 |
|