Spaces:

mginoben
/

tagalog-profanity-classification

Sleeping

mginoben commited on Apr 4, 2023

Commit

fe9ff70

•

1 Parent(s): 6912dca

Added no profanity handle

Files changed (1) hide show

app.py CHANGED Viewed

@@ -72,7 +72,6 @@ def fuzzy_lookup(tweet):
             for prof_variant in prof_varations:
                 tweet = tweet.replace(prof_variant, profanity)
-    print('Fuzzy Returns:', tweet)
     return tweet, matches
@@ -131,24 +130,30 @@ def query(payload):
 def predict(tweet):
     fuzzy_text, matches = fuzzy_lookup(tweet)
-    output = query(preprocess(fuzzy_text))
     if 'error' in output:
-        return output['error'], 'Error occured. Try again later.', {"error": "error"}
     else:
         output = [tuple(i.values()) for i in output[0]]
         output = dict((x, y) for x, y in output)
         predicted_label = list(output.keys())[0]
         if predicted_label == 'Abusive':
             for base_word, _ in matches.items():
                 tweet = tweet.replace(base_word, re.sub("[a-zA-Z0-9@]", "*", base_word))
             return output, tweet, json.dumps(matches)
         else:
             return output, tweet, json.dumps(matches)
 hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')

             for prof_variant in prof_varations:
                 tweet = tweet.replace(prof_variant, profanity)
     return tweet, matches
 def predict(tweet):
     fuzzy_text, matches = fuzzy_lookup(tweet)
+    processed_text = preprocess(fuzzy_text)
+    output = query(processed_text)
+    match_profanities = set(processed_text.split()) & set(list(profanities.keys()))
     if 'error' in output:
+        return output['error'], 'Error occured. Try again later.', {}
+    elif len(match_profanities) == 0:
+        return 'No Profanity Found.', '', {}
     else:
         output = [tuple(i.values()) for i in output[0]]
         output = dict((x, y) for x, y in output)
         predicted_label = list(output.keys())[0]
         if predicted_label == 'Abusive':
+            # Censor
             for base_word, _ in matches.items():
                 tweet = tweet.replace(base_word, re.sub("[a-zA-Z0-9@]", "*", base_word))
             return output, tweet, json.dumps(matches)
         else:
             return output, tweet, json.dumps(matches)
+# output, tweet, matches = predict('Sama ng ugali mo pre')
+# print(output, '\n', tweet, '\n', matches)
 hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')