import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from collections import Counter article_string = "Author: Ruan Chaves Rodrigues. Read more about our research on the evaluation of Portuguese language models." app_title = "Question Answering (Respostas a Perguntas)" app_description = """ This app determines if an answer is appropriate for a question. You can either introduce your own sentences by filling in "Question" and "Answer" or click on one of the example pairs provided below. (Este aplicativo determina se uma resposta é apropriada para uma pergunta. Você pode introduzir suas próprias frases preenchendo "Question" e "Answer" ou clicar em um dos exemplos de pares fornecidos abaixo.) """ app_examples = [ ["Qual a montanha mais alta do mundo?", "Monte Everest é a montanha mais alta do mundo."], ["Quais as duas línguas mais faladas no mundo?", "Leonardo da Vinci pintou a Mona Lisa."], ["Qual a personagem mais famosa de Maurício de Sousa?", "A personagem mais famosa de Mauricio de Sousa é a Mônica."], ] output_textbox_component_description = """ Output will appear here once the app has finished analyzing the answer. (A saída aparecerá aqui assim que o aplicativo terminar de analisar a resposta.) """ output_json_component_description = { "breakdown": """ This box presents a detailed breakdown of the evaluation for each model. """, "detalhamento": """ (Esta caixa apresenta um detalhamento da avaliação para cada modelo.) """ } score_descriptions = { 0: "Negative: The answer is not suitable for the provided question.", 1: "Positive: The answer is suitable for the provided question.", } score_descriptions_pt = { 0: "(Negativo: A resposta não é adequada para a pergunta fornecida.)", 1: "(Positivo: A resposta é adequada para a pergunta fornecida.)", } model_list = [ "ruanchaves/mdeberta-v3-base-faquad-nli", "ruanchaves/bert-base-portuguese-cased-faquad-nli", "ruanchaves/bert-large-portuguese-cased-faquad-nli", ] user_friendly_name = { "ruanchaves/mdeberta-v3-base-faquad-nli": "mDeBERTa-v3 (FaQuAD)", "ruanchaves/bert-base-portuguese-cased-faquad-nli": "BERTimbau base (FaQuAD)", "ruanchaves/bert-large-portuguese-cased-faquad-nli": "BERTimbau large (FaQuAD)", } model_array = [] for model_name in model_list: row = {} row["name"] = model_name row["tokenizer"] = AutoTokenizer.from_pretrained(model_name) row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name) model_array.append(row) def most_frequent(array): occurence_count = Counter(array) return occurence_count.most_common(1)[0][0] def predict(s1, s2): scores = {} for row in model_array: name = user_friendly_name[row["name"]] tokenizer = row["tokenizer"] model = row["model"] model_input = tokenizer(*([s1], [s2]), padding=True, return_tensors="pt") with torch.no_grad(): output = model(**model_input) score = output[0][0].argmax().item() scores[name] = score average_score = most_frequent(list(scores.values())) description = score_descriptions[average_score] description_pt = score_descriptions_pt[average_score] final_description = description + "\n \n" + description_pt for key, value in scores.items(): scores[key] = score_descriptions[value] return final_description, scores inputs = [ gr.inputs.Textbox(label="Question"), gr.inputs.Textbox(label="Answer") ] outputs = [ gr.Textbox(label="Evaluation", value=output_textbox_component_description), gr.JSON(label="Results by model", value=output_json_component_description) ] gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title, description=app_description, examples=app_examples, article = article_string).launch()