annalyzin commited on
Commit
ec2682f
1 Parent(s): bcea939

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -37
app.py CHANGED
@@ -1,52 +1,44 @@
1
  import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
8
 
9
- # import gradio as gr
10
- # import torch
11
- # from transformers import AutoTokenizer, AutoModelForMaskedLM
12
 
 
 
13
 
14
- # model_checkpoint = "xlm-roberta-base"
 
 
15
 
16
- # tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
17
- # model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
18
- # mask_token = tokenizer.mask_token
19
-
20
- # text = f"雨天,我整个人就便{mask_token}了,不想出外,甚至不想去上课。"
21
- # target_word = "懒惰"
22
-
23
- # def eval_prob(target_word, text):
24
- # # Get index of target_word
25
- # idx = tokenizer.encode(target_word)[2]
26
-
27
- # # Get logits
28
- # inputs = tokenizer(text, return_tensors="pt")
29
- # token_logits = model(**inputs).logits
30
 
31
- # # Find the location of the MASK and extract its logits
32
- # mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
33
- # mask_token_logits = token_logits[0, mask_token_index, :]
34
 
35
- # # Convert logits to softmax probability
36
- # logits = mask_token_logits[0].tolist()
37
- # probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
38
 
39
- # # Get probability of target word filling the MASK
40
- # result = float(probs[idx])
41
 
42
- # return round(result, 5)
43
 
44
- # gr.Interface(
45
- # fn=eval_prob,
46
- # inputs="text",
47
- # outputs="text",
48
- # title="Chinese Sentence Grading",
49
- # ).launch()
50
 
51
  # Plot bar chart of probs x target_words to find optimal cutoff
52
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
4
 
 
 
5
 
6
+ model_checkpoint = "xlm-roberta-base"
 
7
 
8
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
9
+ model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
10
+ mask_token = tokenizer.mask_token
11
 
12
+ text = f"雨天,我整个人就便{mask_token}了,不想出外,甚至不想去上课。"
13
+ target_word = "懒惰"
14
 
15
+ def eval_prob(target_word, text):
16
+ # Get index of target_word
17
+ idx = tokenizer.encode(target_word)[2]
18
 
19
+ # Get logits
20
+ inputs = tokenizer(text, return_tensors="pt")
21
+ token_logits = model(**inputs).logits
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Find the location of the MASK and extract its logits
24
+ mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
25
+ mask_token_logits = token_logits[0, mask_token_index, :]
26
 
27
+ # Convert logits to softmax probability
28
+ logits = mask_token_logits[0].tolist()
29
+ probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
30
 
31
+ # Get probability of target word filling the MASK
32
+ result = float(probs[idx])
33
 
34
+ return round(result, 5)
35
 
36
+ gr.Interface(
37
+ fn=eval_prob,
38
+ inputs="text",
39
+ outputs="text",
40
+ title="Chinese Sentence Grading",
41
+ ).launch()
42
 
43
  # Plot bar chart of probs x target_words to find optimal cutoff
44