mginoben commited on
Commit
93baba5
1 Parent(s): 1c79acc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -37,7 +37,9 @@ def fuzzy_lookup(tweet):
37
 
38
  # Loop each word in tweet
39
  for word in tweet.split():
40
- # Only get digits and letters then lowercase
 
 
41
  processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
42
  scores = []
43
  matched_words = []
@@ -52,15 +54,15 @@ def fuzzy_lookup(tweet):
52
  if len(scores) > 0:
53
  max_score_index = np.argmax(scores)
54
  if matched_words[max_score_index] in lookup_profanity:
55
- matches[word] = matched_words[max_score_index]
56
 
57
 
58
- for word, matched_profanity in matches.items():
59
- word_split = word.split(matched_profanity[-2:])
60
  for pronoun in obj_pronouns:
61
  if len(word_split) > 1:
62
  if pronoun == word_split[-1]:
63
- matches[word] = matched_profanity + ' ' + pronoun
64
  break
65
 
66
  # Replace each profanities by fuzzy lookup result
 
37
 
38
  # Loop each word in tweet
39
  for word in tweet.split():
40
+ # Remove punctuations
41
+ base_word = word.translate(str.maketrans('', '', string.punctuation))
42
+ # Only get digits and letters then lowercase
43
  processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
44
  scores = []
45
  matched_words = []
 
54
  if len(scores) > 0:
55
  max_score_index = np.argmax(scores)
56
  if matched_words[max_score_index] in lookup_profanity:
57
+ matches[base_word] = matched_words[max_score_index]
58
 
59
 
60
+ for base_word, matched_profanity in matches.items():
61
+ word_split = base_word.split(matched_profanity[-2:])
62
  for pronoun in obj_pronouns:
63
  if len(word_split) > 1:
64
  if pronoun == word_split[-1]:
65
+ matches[base_word] = matched_profanity + ' ' + pronoun
66
  break
67
 
68
  # Replace each profanities by fuzzy lookup result