mginoben commited on
Commit
33125f0
1 Parent(s): 40a4fcd

Fixed profanities in english lookup words

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -40,6 +40,7 @@ obj_pronouns = read_text('obj_pronouns')
40
  profanities = read_text('profanities', 'json')
41
  eng_words = set(words.words())
42
 
 
43
 
44
  def fuzzy_lookup(tweet):
45
 
@@ -47,7 +48,7 @@ def fuzzy_lookup(tweet):
47
  lookup_profanity = np.concatenate([np.hstack(list(profanities.values())), list(profanities.keys())])
48
 
49
  for word in tweet.split():
50
- if word in eng_words:
51
  break
52
  scores = []
53
  matched_words = []
@@ -66,6 +67,7 @@ def fuzzy_lookup(tweet):
66
  if matched_words[max_score_index] in lookup_profanity:
67
  matched_profanity[word] = matched_words[max_score_index]
68
 
 
69
  for word, profanity in matched_profanity.items():
70
  word_split = word.split(profanity[-2:])
71
  for pronoun in obj_pronouns:
 
40
  profanities = read_text('profanities', 'json')
41
  eng_words = set(words.words())
42
 
43
+ # TODO check eng words that are tagalog profanities
44
 
45
  def fuzzy_lookup(tweet):
46
 
 
48
  lookup_profanity = np.concatenate([np.hstack(list(profanities.values())), list(profanities.keys())])
49
 
50
  for word in tweet.split():
51
+ if word in list(set(eng_words) - set(lookup_profanity)):
52
  break
53
  scores = []
54
  matched_words = []
 
67
  if matched_words[max_score_index] in lookup_profanity:
68
  matched_profanity[word] = matched_words[max_score_index]
69
 
70
+ # Expand Pronouns in Profanities
71
  for word, profanity in matched_profanity.items():
72
  word_split = word.split(profanity[-2:])
73
  for pronoun in obj_pronouns: