FredZhang7 commited on
Commit
71aeaca
1 Parent(s): 767bf1b

Added missing step

Browse files
Files changed (1) hide show
  1. preprocess.py +10 -7
preprocess.py CHANGED
@@ -1,20 +1,23 @@
1
  import re
2
 
3
- def clean_sentence(sentence):
4
- # Remove "!", "?", ".", "(", ")" from the sentence
5
- sentence = re.sub(r"[!.?()]", "", sentence)
 
 
 
6
 
7
  # Replace " , " with an empty space
8
- sentence = re.sub(r" , ", " ", sentence)
9
 
10
  # Remove any trailing commas
11
- sentence = re.sub(r"^,|,$", "", sentence)
12
 
13
  # Strip spaces
14
- sentence = sentence.strip()
15
 
16
  # Remove any usernames
17
- words = sentence.split(", ")
18
  result = []
19
  for word in words:
20
  word = word.strip()
 
1
  import re
2
 
3
+ def clean_tags(tags):
4
+ # Make tags more human readable
5
+ tags = tags.replace(' ', ', ').replace('_', ' ')
6
+
7
+ # Remove "!", "?", ".", "(", ")" from the tags
8
+ tags = re.sub(r"[!.?()]", "", tags)
9
 
10
  # Replace " , " with an empty space
11
+ tags = re.sub(r" , ", " ", tags)
12
 
13
  # Remove any trailing commas
14
+ tags = re.sub(r"^,|,$", "", tags)
15
 
16
  # Strip spaces
17
+ tags = tags.strip()
18
 
19
  # Remove any usernames
20
+ words = tags.split(", ")
21
  result = []
22
  for word in words:
23
  word = word.strip()