Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,52 +1,44 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
|
3 |
-
def greet(name):
|
4 |
-
return "Hello " + name + "!"
|
5 |
|
6 |
-
|
7 |
-
demo.launch()
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
|
|
|
|
|
13 |
|
14 |
-
|
|
|
|
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
# text = f"雨天,我整个人就便{mask_token}了,不想出外,甚至不想去上课。"
|
21 |
-
# target_word = "懒惰"
|
22 |
-
|
23 |
-
# def eval_prob(target_word, text):
|
24 |
-
# # Get index of target_word
|
25 |
-
# idx = tokenizer.encode(target_word)[2]
|
26 |
-
|
27 |
-
# # Get logits
|
28 |
-
# inputs = tokenizer(text, return_tensors="pt")
|
29 |
-
# token_logits = model(**inputs).logits
|
30 |
|
31 |
-
#
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
#
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
#
|
40 |
-
|
41 |
|
42 |
-
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
|
51 |
# Plot bar chart of probs x target_words to find optimal cutoff
|
52 |
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
4 |
|
|
|
|
|
5 |
|
6 |
+
model_checkpoint = "xlm-roberta-base"
|
|
|
7 |
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
9 |
+
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
|
10 |
+
mask_token = tokenizer.mask_token
|
11 |
|
12 |
+
text = f"雨天,我整个人就便{mask_token}了,不想出外,甚至不想去上课。"
|
13 |
+
target_word = "懒惰"
|
14 |
|
15 |
+
def eval_prob(target_word, text):
|
16 |
+
# Get index of target_word
|
17 |
+
idx = tokenizer.encode(target_word)[2]
|
18 |
|
19 |
+
# Get logits
|
20 |
+
inputs = tokenizer(text, return_tensors="pt")
|
21 |
+
token_logits = model(**inputs).logits
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# Find the location of the MASK and extract its logits
|
24 |
+
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
|
25 |
+
mask_token_logits = token_logits[0, mask_token_index, :]
|
26 |
|
27 |
+
# Convert logits to softmax probability
|
28 |
+
logits = mask_token_logits[0].tolist()
|
29 |
+
probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
|
30 |
|
31 |
+
# Get probability of target word filling the MASK
|
32 |
+
result = float(probs[idx])
|
33 |
|
34 |
+
return round(result, 5)
|
35 |
|
36 |
+
gr.Interface(
|
37 |
+
fn=eval_prob,
|
38 |
+
inputs="text",
|
39 |
+
outputs="text",
|
40 |
+
title="Chinese Sentence Grading",
|
41 |
+
).launch()
|
42 |
|
43 |
# Plot bar chart of probs x target_words to find optimal cutoff
|
44 |
|