mginoben commited on
Commit
114694a
1 Parent(s): 1802e7e

Remove trailing punctuations

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -6,7 +6,7 @@ import json
6
  from thefuzz import process, fuzz
7
  import numpy as np
8
  import re
9
- import string
10
 
11
 
12
  API_URL = "https://api-inference.huggingface.co/models/Dabid/test2"
@@ -39,7 +39,8 @@ def fuzzy_lookup(tweet):
39
  # Loop each word in tweet
40
  for word in tweet.split():
41
  # Remove punctuations
42
- base_word = word.translate(str.maketrans('', '', string.punctuation))
 
43
  # Only get digits and letters then lowercase
44
  processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
45
  scores = []
@@ -55,15 +56,15 @@ def fuzzy_lookup(tweet):
55
  if len(scores) > 0:
56
  max_score_index = np.argmax(scores)
57
  if matched_words[max_score_index] in lookup_profanity:
58
- matches[base_word] = matched_words[max_score_index]
59
 
60
 
61
- for base_word, matched_profanity in matches.items():
62
- word_split = base_word.split(matched_profanity[-2:])
63
  for pronoun in obj_pronouns:
64
  if len(word_split) > 1:
65
  if pronoun == word_split[-1]:
66
- matches[base_word] = matched_profanity + ' ' + pronoun
67
  break
68
 
69
  # Replace each profanities by fuzzy lookup result
 
6
  from thefuzz import process, fuzz
7
  import numpy as np
8
  import re
9
+ from string import punctuation
10
 
11
 
12
  API_URL = "https://api-inference.huggingface.co/models/Dabid/test2"
 
39
  # Loop each word in tweet
40
  for word in tweet.split():
41
  # Remove punctuations
42
+ word = word.strip(punctuation)
43
+
44
  # Only get digits and letters then lowercase
45
  processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
46
  scores = []
 
56
  if len(scores) > 0:
57
  max_score_index = np.argmax(scores)
58
  if matched_words[max_score_index] in lookup_profanity:
59
+ matches[word] = matched_words[max_score_index]
60
 
61
 
62
+ for word, matched_profanity in matches.items():
63
+ word_split = word.split(matched_profanity[-2:])
64
  for pronoun in obj_pronouns:
65
  if len(word_split) > 1:
66
  if pronoun == word_split[-1]:
67
+ matches[word] = matched_profanity + ' ' + pronoun
68
  break
69
 
70
  # Replace each profanities by fuzzy lookup result