Spaces:
Runtime error
Runtime error
File size: 4,582 Bytes
2fe8373 34cde4f 2fe8373 82306f4 2fe8373 4d06079 2fe8373 5d1afde 2fe8373 4d06079 2fe8373 34cde4f 2fe8373 34cde4f 2fe8373 34cde4f 2fe8373 34cde4f 3ffe88d 34cde4f 5d1afde 1b7e15d 5d1afde 34cde4f 1e15120 2fe8373 9c58193 3fffd5a 2fe8373 5459125 5d1afde 2fe8373 7c236b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from collections import Counter
from scipy.special import softmax
article_string = "Author: <a href=\"https://huggingface.co/ruanchaves\">Ruan Chaves Rodrigues</a>. Read more about our <a href=\"https://github.com/ruanchaves/eplm\">research on the evaluation of Portuguese language models</a>."
app_title = "Offensive Language Detection (Detecção de Linguagem Ofensiva)"
app_description = """
This app detects offensive language on Portuguese text using multiple models. You can either introduce your own sentences by filling in "Text" or click on one of the examples provided below.
(Este aplicativo detecta linguagem ofensiva em texto em português usando vários modelos. Introduza suas próprias frases preenchendo o campo "Text", ou clique em um dos exemplos fornecidos abaixo.)
"""
app_examples = [
["Aquele cara é um babaca."],
["Quem não deve não teme!!"],
["Que nojo!🤮🤮🤮🤮🤮"],
["Vagabunda,Ordinária"],
["Vou mandar um óleo de peroba pra ela de presente! 😂😂😂😂"],
["Porque é corrupta é conivente com o desgoverno anterior"],
["A cada dia fico mais admirado com a cara de pau da elite dominante desse mundo até quando irão nos fazer de otários"]
]
output_textbox_component_description = """
This box will display offensive language detection results based on the average score of multiple models.
(Esta caixa exibirá resultados da detecção de linguagem ofensiva com base na pontuação média de vários modelos.)
"""
output_json_component_description = { "breakdown": """
This box presents a detailed breakdown of the evaluation for each model.
""",
"detalhamento": """
(Esta caixa apresenta um detalhamento da avaliação para cada modelo.)
""" }
short_score_descriptions = {
0: "Not offensive",
1: "Offensive"
}
score_descriptions = {
0: "This text is not offensive.",
1: "This text is offensive.",
}
score_descriptions_pt = {
1: "(Este texto é ofensivo.)",
0: "(Este texto não é ofensivo.)",
}
model_list = [
"ruanchaves/mdeberta-v3-base-hatebr",
"ruanchaves/bert-base-portuguese-cased-hatebr",
"ruanchaves/bert-large-portuguese-cased-hatebr",
]
user_friendly_name = {
"ruanchaves/mdeberta-v3-base-hatebr": "mDeBERTa-v3 (HateBR)",
"ruanchaves/bert-base-portuguese-cased-hatebr": "BERTimbau base (HateBR)",
"ruanchaves/bert-large-portuguese-cased-hatebr": "BERTimbau large (HateBR)",
}
reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
user_friendly_name_list = list(user_friendly_name.values())
model_array = []
for model_name in model_list:
row = {}
row["name"] = model_name
row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
model_array.append(row)
def most_frequent(array):
occurence_count = Counter(array)
return occurence_count.most_common(1)[0][0]
def predict(s1, chosen_model):
if not chosen_model:
chosen_model = user_friendly_name_list[0]
scores = {}
full_chosen_model_name = reverse_user_friendly_name[chosen_model]
for row in model_array:
name = row["name"]
if name != full_chosen_model_name:
continue
else:
tokenizer = row["tokenizer"]
model = row["model"]
model_input = tokenizer(*([s1],), padding=True, return_tensors="pt")
with torch.no_grad():
output = model(**model_input)
logits = output[0][0].detach().numpy()
logits = softmax(logits).tolist()
break
def get_description(idx):
description = score_descriptions[idx]
description_pt = score_descriptions_pt[idx]
final_description = description + "\n \n" + description_pt
return final_description
max_pos = logits.index(max(logits))
markdown_description = get_description(max_pos)
scores = { short_score_descriptions[k]:v for k,v in enumerate(logits) }
return scores, markdown_description
inputs = [
gr.Textbox(label="Text", value=app_examples[0][0]),
gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
]
outputs = [
gr.Label(label="Result"),
gr.Markdown(),
]
gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
description=app_description,
examples=app_examples,
article = article_string).launch() |