mginoben commited on
Commit
16316d5
1 Parent(s): fe9ff70
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -37,8 +37,8 @@ def fuzzy_lookup(tweet):
37
 
38
  # Loop each word in tweet
39
  for word in tweet.split():
40
- # Only get digits and letters
41
- word = re.sub("[^a-zA-Z0-9@]", "", word)
42
  scores = []
43
  matched_words = []
44
  # If word > 4 chars
@@ -132,11 +132,11 @@ def predict(tweet):
132
  fuzzy_text, matches = fuzzy_lookup(tweet)
133
  processed_text = preprocess(fuzzy_text)
134
  output = query(processed_text)
135
- match_profanities = set(processed_text.split()) & set(list(profanities.keys()))
136
 
137
  if 'error' in output:
138
  return output['error'], 'Error occured. Try again later.', {}
139
- elif len(match_profanities) == 0:
140
  return 'No Profanity Found.', '', {}
141
  else:
142
  output = [tuple(i.values()) for i in output[0]]
@@ -146,12 +146,15 @@ def predict(tweet):
146
  if predicted_label == 'Abusive':
147
  # Censor
148
  for base_word, _ in matches.items():
149
- tweet = tweet.replace(base_word, re.sub("[a-zA-Z0-9@]", "*", base_word))
 
 
 
150
  return output, tweet, json.dumps(matches)
151
  else:
152
  return output, tweet, json.dumps(matches)
153
 
154
- # output, tweet, matches = predict('Sama ng ugali mo pre')
155
  # print(output, '\n', tweet, '\n', matches)
156
 
157
  hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
 
37
 
38
  # Loop each word in tweet
39
  for word in tweet.split():
40
+ # Only get digits and letters then lowercase
41
+ word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
42
  scores = []
43
  matched_words = []
44
  # If word > 4 chars
 
132
  fuzzy_text, matches = fuzzy_lookup(tweet)
133
  processed_text = preprocess(fuzzy_text)
134
  output = query(processed_text)
135
+
136
 
137
  if 'error' in output:
138
  return output['error'], 'Error occured. Try again later.', {}
139
+ elif len(matches) == 0:
140
  return 'No Profanity Found.', '', {}
141
  else:
142
  output = [tuple(i.values()) for i in output[0]]
 
146
  if predicted_label == 'Abusive':
147
  # Censor
148
  for base_word, _ in matches.items():
149
+ mask = '*' * len(base_word)
150
+ compiled = re.compile(re.escape(base_word), re.IGNORECASE)
151
+ tweet = compiled.sub(mask, tweet)
152
+ # tweet = tweet.replace(base_word, re.sub("[a-zA-Z0-9@]", "*", base_word))
153
  return output, tweet, json.dumps(matches)
154
  else:
155
  return output, tweet, json.dumps(matches)
156
 
157
+ # output, tweet, matches = predict('ul0L Sama ng ugali mo pre Tangina uL0l!!!')
158
  # print(output, '\n', tweet, '\n', matches)
159
 
160
  hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')