Spaces:

mginoben
/

tagalog-profanity-classification

Sleeping

App Files Files Community

mginoben commited on Apr 13, 2023

Commit

91caef4

•

1 Parent(s): 7a70c71

FIxed bugs

Browse files

Files changed (1) hide show

app.py +30 -26

app.py CHANGED Viewed

@@ -12,6 +12,11 @@ from string import punctuation
 API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
 headers = {"Authorization": "Bearer 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"}
 def read_text(filename, filetype='txt'):
     words = []
@@ -31,11 +36,6 @@ lookup_words = read_text('lookup_words')
 obj_pronouns = read_text('obj_pronouns')
 profanities = read_text('profanities', 'json')
-def query(text):
-    text = {"inputs": text}
-    response = requests.post(API_URL, headers=headers, json=text)
-    return response.json()
 # for profanity in profanities:
 #     print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
@@ -77,7 +77,7 @@ def fuzzy_lookup(tweet):
         for pronoun in obj_pronouns:
             if len(word_split) > 1:
                 if pronoun == word_split[-1]:
-                    matched_profanity[word] = matched_profanity + ' ' + pronoun
                     break
     # Replace each profanities by fuzzy lookup result
@@ -120,28 +120,30 @@ def preprocess(tweet):
     # Combine list of words back to sentence
     preprocessed_tweet = ' '.join(filter(None, row_split))
     # Check if output contains single word then return null
     if len(preprocessed_tweet.split()) == 1:
-        return preprocessed_tweet
     # Expand Contractions
     for i in contractions.items():
         preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
-    # Fuzzy Lookup
-    preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
     return preprocessed_tweet, matches
 def predict(tweet):
     preprocessed_tweet, matched_profanity = preprocess(tweet)
     prediction = query(preprocessed_tweet)
     if type(prediction) is dict:
-        return "Model is still loading. Try again."
     if bool(matched_profanity) == False:
         return "No profanity found."
@@ -149,9 +151,9 @@ def predict(tweet):
     prediction = [tuple(i.values()) for i in prediction[0]]
     prediction = dict((x, y) for x, y in prediction)
-    print("\n", tweet)
-    print(matched_profanity)
-    print(prediction, "\n")
     return prediction
@@ -189,18 +191,20 @@ def predict(tweet):
 # # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
-demo = gr.Interface(
-    fn=predict,
-    inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],
-    outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION")],
-    examples=['Tangina mo naman sobrang yabang mo gago!!😠😤 @davidrafael',
-              'Napakainit ngayong araw pakshet namaaan!!',
-              'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
-              'Bobo ka ba? napakadali lang nyan eh... 🤡',
-              'Uy gago laptrip yung nangyare samen kanina HAHAHA😂😂'],
-)
-demo.launch()

 API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
 headers = {"Authorization": "Bearer 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"}
+def query(text):
+    text = {"inputs": text}
+    response = requests.post(API_URL, headers=headers, json=text)
+    return response.json()
 def read_text(filename, filetype='txt'):
     words = []
 obj_pronouns = read_text('obj_pronouns')
 profanities = read_text('profanities', 'json')
 # for profanity in profanities:
 #     print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
         for pronoun in obj_pronouns:
             if len(word_split) > 1:
                 if pronoun == word_split[-1]:
+                    matched_profanity[word] = profanity + ' ' + pronoun
                     break
     # Replace each profanities by fuzzy lookup result
     # Combine list of words back to sentence
     preprocessed_tweet = ' '.join(filter(None, row_split))
+    # Fuzzy Lookup
+    preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
     # Check if output contains single word then return null
     if len(preprocessed_tweet.split()) == 1:
+        return preprocessed_tweet, matches
     # Expand Contractions
     for i in contractions.items():
         preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
     return preprocessed_tweet, matches
 def predict(tweet):
     preprocessed_tweet, matched_profanity = preprocess(tweet)
     prediction = query(preprocessed_tweet)
     if type(prediction) is dict:
+        # return "Model is still loading. Try again."
+        print("loading")
+        predict(tweet)
     if bool(matched_profanity) == False:
         return "No profanity found."
     prediction = [tuple(i.values()) for i in prediction[0]]
     prediction = dict((x, y) for x, y in prediction)
+    print("\nTWEET:", tweet)
+    print("DETECTED PROFANITY:", matched_profanity)
+    print("LABELS:", prediction, "\n")
     return prediction
 # # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
+# demo = gr.Interface(
+#     fn=predict,
+#     inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],
+#     outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION")],
+#     examples=['Tangina mo naman sobrang yabang mo gago!!😠😤 @davidrafael',
+#               'Napakainit ngayong araw pakshet namaaan!!',
+#               'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
+#               'Bobo ka ba? napakadali lang nyan eh... 🤡',
+#               'Uy gago laptrip yung nangyare samen kanina HAHAHA😂😂'],
+# )
+# demo.launch()
+predict("asdasd kgjhgjhgj")