mginoben's picture
Fix error handling issues
48392ea
raw
history blame
No virus
4.85 kB
import gradio as gr
import requests
import emoji
import re
API_URL = "https://api-inference.huggingface.co/models/Dabid/test2"
headers = {"Authorization": "Bearer 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"}
profanities = ['bobo', 'bobong', 'bwiset', 'bwisit', 'buwisit', 'buwiset', 'bwesit', 'gago', 'gagong', 'kupal',
'pakshet', 'pakyu', 'pucha', 'puchang',
'punyeta', 'punyetang', 'puta', 'putang', 'putangina', 'putanginang', 'tanga', 'tangang', 'tangina',
'tanginang', 'tarantado', 'tarantadong', 'ulol']
contractions = {
'di': 'hindi',
'to': 'ito',
'no': 'ano',
'kundi': 'kung hindi',
'nya': 'niya',
'nyo': 'ninyo',
'niyo': 'ninyo',
'pano': 'paano',
'sainyo': 'sa inyo',
'sayo': 'sa iyo',
'pag': 'kapag',
'kesa': 'kaysa',
'dun': 'doon',
'ganto': 'ganito',
'nandun': 'nandoon',
'saka': 'tsaka',
'ung': 'yung',
'wag': 'huwag',
'sya': 'siya',
'bat': 'bakit',
'yon': 'iyon',
'yun': 'iyon',
'dyan': 'diyan',
'jan': 'diyan',
'andito': 'nandito',
'tanginamo': 'tangina mo',
'putanginamo': 'putangina mo',
'san': 'saan',
'ganun': 'ganoon',
'gagong': 'gago na',
'bobong': 'bobo na',
'tangang': 'tanga na',
'kelan': 'kailan',
'raw': 'daw',
'tanginang': 'tangina na',
'tarantadong': 'tarantado na',
'putang ina': 'putangina',
'putang inang': 'putangina',
'putanginang': 'putangina',
'itong': 'ito ang',
'lng': 'lang',
'bwisit': 'bwiset',
'bwesit': 'bwiset',
'buwisit': 'bwiset',
'buwesit': 'bwiset'
}
def preprocess(row):
laugh_texts = ['hahaha', 'wahaha', 'hahaa', 'ahha', 'haaha', 'hahah', 'ahah', 'hha']
symbols = ['@', '#']
# Lowercase
row = row.lower()
# Remove emojis
row = emoji.replace_emoji(row, replace='')
# Replace elongated words 'grabeee' -> 'grabe' (not applicable on 2 corresponding letter)
row = re.sub(r'(.)\1{2,}', r'\1', row)
# Split sentence into list of words
row_split = row.split()
for index, word in enumerate(row_split):
# Remove words with symbols (e.g. @username, #hashtags)
if any(x in word for x in symbols):
row_split[index] = ''
# Remove links
if 'http' in word:
row_split[index] = ''
# Unify laugh texts format to 'haha'
if any(x in word for x in laugh_texts):
row_split[index] = 'haha'
# Remove words with digits (4ever)
if any(x.isdigit() for x in word):
row_split[index] = ''
# Combine list of words back to sentence
combined_text = ' '.join(filter(None, row_split))
# Check if output contains single word then return null
if len(combined_text.split()) == 1:
return combined_text
# Filter needed characters
combined_text = re.sub(r"[^A-Za-z ]+", '', combined_text)
# Expand Contractions
for i in contractions.items():
combined_text = re.sub(rf"\b{i[0]}\b", i[1], combined_text)
return combined_text
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def predict(text):
output = query(preprocess(text))
print(preprocess(text))
if 'error' in output:
return output['error'], 'Error occured. Try again later.'
else:
output = [tuple(i.values()) for i in output[0]]
output = dict((x, y) for x, y in output)
predicted_label = list(output.keys())[0]
if predicted_label == 'Abusive':
output_text = text
for profanity in profanities:
compiled = re.compile(re.escape(profanity), re.IGNORECASE)
mask = ""
for i in profanity:
mask += "*" if i != " " else " "
output_text = compiled.sub(mask, output_text)
return output, output_text
else:
return output, text
hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
demo = gr.Interface(
fn=predict,
inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],
outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION"),
gr.components.Text(label='OUTPUT')],
examples=['Tangina mo naman sobrang yabang mo gago!!😠😀 @davidrafael',
'Napakainit ngayong araw pakshet namaaan!!',
'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
'Bobo ka ba? napakadali lang nyan eh... 🀑',
'Uy gago laptrip yung nangyare samen kanina HAHAHAπŸ˜‚πŸ˜‚'],
allow_flagging="manual",
flagging_callback=hf_writer,
flagging_options=['Good bot', 'Bad bot']
)
demo.launch()