ruanchaves commited on
Commit
8eeeaa9
1 Parent(s): 47979d5

text simplification

Browse files
Files changed (2) hide show
  1. app.py +34 -22
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  from collections import Counter
 
5
 
6
  article_string = "Author: <a href=\"https://huggingface.co/ruanchaves\">Ruan Chaves Rodrigues</a>. Read more about our <a href=\"https://github.com/ruanchaves/eplm\">research on the evaluation of Portuguese language models</a>."
7
 
@@ -56,6 +57,10 @@ user_friendly_name = {
56
  "ruanchaves/bert-base-portuguese-cased-porsimplessent": "BERTimbau base (PorSimplesSent)",
57
  }
58
 
 
 
 
 
59
  model_array = []
60
 
61
  for model_name in model_list:
@@ -69,36 +74,43 @@ def most_frequent(array):
69
  occurence_count = Counter(array)
70
  return occurence_count.most_common(1)[0][0]
71
 
72
- def predict(s1, s2):
 
 
73
  scores = {}
 
74
  for row in model_array:
75
- name = user_friendly_name[row["name"]]
76
- tokenizer = row["tokenizer"]
77
- model = row["model"]
78
- model_input = tokenizer(*([s1], [s2]), padding=True, return_tensors="pt")
79
- with torch.no_grad():
80
- output = model(**model_input)
81
- score = output[0][0].argmax().item()
82
- scores[name] = score
83
- average_score = most_frequent(list(scores.values()))
84
- description = score_descriptions[average_score]
85
- description_pt = score_descriptions_pt[average_score]
86
- final_description = description + "\n \n" + description_pt
87
-
88
- for key, value in scores.items():
89
- scores[key] = score_descriptions[value]
90
-
91
- return final_description, scores
 
 
 
 
92
 
93
 
94
  inputs = [
95
- gr.inputs.Textbox(label="Sentence A"),
96
- gr.inputs.Textbox(label="Sentence B")
 
97
  ]
98
 
99
  outputs = [
100
- gr.Textbox(label="Evaluation", value=output_textbox_component_description),
101
- gr.JSON(label="Results by model", value=output_json_component_description)
102
  ]
103
 
104
 
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  from collections import Counter
5
+ from scipy.special import softmax
6
 
7
  article_string = "Author: <a href=\"https://huggingface.co/ruanchaves\">Ruan Chaves Rodrigues</a>. Read more about our <a href=\"https://github.com/ruanchaves/eplm\">research on the evaluation of Portuguese language models</a>."
8
 
 
57
  "ruanchaves/bert-base-portuguese-cased-porsimplessent": "BERTimbau base (PorSimplesSent)",
58
  }
59
 
60
+ reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
61
+
62
+ user_friendly_name_list = list(user_friendly_name.values())
63
+
64
  model_array = []
65
 
66
  for model_name in model_list:
 
74
  occurence_count = Counter(array)
75
  return occurence_count.most_common(1)[0][0]
76
 
77
+ def predict(s1, s2, chosen_model):
78
+ if not chosen_model:
79
+ chosen_model = user_friendly_name_list[0]
80
  scores = {}
81
+ full_chosen_model_name = reverse_user_friendly_name[chosen_model]
82
  for row in model_array:
83
+ name = row["name"]
84
+ if name != full_chosen_model_name:
85
+ continue
86
+ else:
87
+ tokenizer = row["tokenizer"]
88
+ model = row["model"]
89
+ model_input = tokenizer(*([s1], [s2]), padding=True, return_tensors="pt")
90
+ with torch.no_grad():
91
+ output = model(**model_input)
92
+ logits = output[0][0].detach().numpy()
93
+ logits = softmax(logits).tolist()
94
+ break
95
+ def get_description(idx):
96
+ description = score_descriptions[idx]
97
+ description_pt = score_descriptions_pt[idx]
98
+ final_description = description + "\n \n" + description_pt
99
+ return final_description
100
+
101
+ scores = { get_description(k):v for k,v in enumerate(logits) }
102
+
103
+ return scores
104
 
105
 
106
  inputs = [
107
+ gr.inputs.Textbox(label="Question"),
108
+ gr.inputs.Textbox(label="Answer"),
109
+ gr.Dropdown(label="Model", choices=user_friendly_name_list, default=user_friendly_name_list[0])
110
  ]
111
 
112
  outputs = [
113
+ gr.Label(label="Result")
 
114
  ]
115
 
116
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  torch
2
  gradio
3
- transformers
 
 
1
  torch
2
  gradio
3
+ transformers
4
+ scipy