Spaces:

ruanchaves
/

portuguese-textual-entailment

Runtime error

App Files Files Community

ruanchaves commited on Apr 1, 2023

Commit

69f0559

•

1 Parent(s): c69e84b

fix

Browse files

Files changed (2) hide show

app.py +37 -30
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from collections import Counter
 def most_frequent(array):
@@ -42,14 +42,14 @@ exibindo a relação entre os pares textuais.)
 score_descriptions = {
   1: "There is an entailment relation between premise and hypothesis. If the premise is true, then the hypothesis must also be true.",
   0: "There is no logical relation between the premise and the hypothesis.",
-  2: "Additionally, the premise has also been detected as being a paraphrase of the hypothesis."
 }
 score_descriptions_pt = {
    1: "(Existe uma relação de implicação entre premissa e hipótese. Se a premissa é verdadeira, então a hipótese também deve ser verdadeira.)",
    0: "(Não há relação lógica entre a premissa e a hipótese.)",
-   2: "(Além disso, a premissa também foi detectada como sendo uma paráfrase da hipótese.)"
 }
 score_short_keys = {
@@ -77,6 +77,11 @@ user_friendly_name = {
     "ruanchaves/bert-large-portuguese-cased-assin-entailment": "BERTimbau large (ASSIN)"
 }
 model_array = []
 for model_name in model_list:
@@ -87,45 +92,47 @@ for model_name in model_list:
     model_array.append(row)
-def entailment(s1, s2):
     scores = {}
     for row in model_array:
-        name = user_friendly_name[row["name"]]
-        tokenizer = row["tokenizer"]
-        model = row["model"]
-        model_input = tokenizer(*([s1], [s2]), padding=True, return_tensors="pt")
-        with torch.no_grad():
-            output = model(**model_input)
-            score = output[0][0].argmax().item()
-            scores[name] = score
-    assin2_scores = {k: v for k, v in scores.items() if "ASSIN 2" in k}
-    average_score = most_frequent(assin2_scores.values())
-    description = score_descriptions[average_score]
-    description_pt = score_descriptions_pt[average_score]
-    if 2 in scores.values():
-        description = description + "\n" + score_descriptions[2]
-        description_pt = description_pt + "\n" + score_descriptions_pt[2]
-    final_description = description + "\n \n" + description_pt
-    for key, value in scores.items():
-      scores[key] = score_short_keys[value]
-    return final_description, scores
 inputs = [
     gr.inputs.Textbox(label="Premise"),
-    gr.inputs.Textbox(label="Hypothesis")
 ]
 outputs = [
- gr.Textbox(label="Evaluation", value=output_textbox_component_description),
- gr.JSON(label="Results by model", value=output_json_component_description)
 ]
-gr.Interface(fn=entailment, inputs=inputs, outputs=outputs, title=app_title,
              description=app_description,
              examples=app_examples,
              article = article_string).launch()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+from scipy.special import softmax
 from collections import Counter
 def most_frequent(array):
 score_descriptions = {
   1: "There is an entailment relation between premise and hypothesis. If the premise is true, then the hypothesis must also be true.",
   0: "There is no logical relation between the premise and the hypothesis.",
+  2: "The premise is a paraphrase of the hypothesis."
 }
 score_descriptions_pt = {
    1: "(Existe uma relação de implicação entre premissa e hipótese. Se a premissa é verdadeira, então a hipótese também deve ser verdadeira.)",
    0: "(Não há relação lógica entre a premissa e a hipótese.)",
+   2: "(A premissa é uma paráfrase da hipótese.)"
 }
 score_short_keys = {
     "ruanchaves/bert-large-portuguese-cased-assin-entailment": "BERTimbau large (ASSIN)"
 }
+reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
+user_friendly_name_list = list(user_friendly_name.values())
 model_array = []
 for model_name in model_list:
     model_array.append(row)
+def predict(s1, s2, chosen_model):
+    if not chosen_model:
+      chosen_model = user_friendly_name_list[0]
     scores = {}
+    full_chosen_model_name = reverse_user_friendly_name[chosen_model]
     for row in model_array:
+        name = row["name"]
+        if name != full_chosen_model_name:
+          continue
+        else:
+          tokenizer = row["tokenizer"]
+          model = row["model"]
+          model_input = tokenizer(*([s1], [s2]), padding=True, return_tensors="pt")
+          with torch.no_grad():
+              output = model(**model_input)
+              logits = output[0][0].detach().numpy()
+              logits = softmax(logits).tolist()
+              break
+    def get_description(idx):
+      description = score_descriptions[idx]
+      description_pt = score_descriptions_pt[idx]
+      final_description = description + "\n \n" + description_pt
+      return final_description
+    scores = { get_description(k):v for k,v in enumerate(logits) }
+    return scores
 inputs = [
     gr.inputs.Textbox(label="Premise"),
+    gr.inputs.Textbox(label="Hypothesis"),
+    gr.Dropdown(label="Model", choices=user_friendly_name_list, default=user_friendly_name_list[0])
 ]
 outputs = [
+ gr.Label(label="Result")
 ]
+gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
              description=app_description,
              examples=app_examples,
              article = article_string).launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch
 gradio
-transformers

 torch
 gradio
+transformers
+scipy