File size: 1,723 Bytes
59392ba
ec2682f
 
59392ba
c129047
ec2682f
c129047
ec2682f
 
 
c129047
ec2682f
 
c129047
ec2682f
 
 
b9d5dc1
ec2682f
 
 
c129047
ec2682f
 
 
c129047
ec2682f
 
 
c129047
ec2682f
 
c129047
ec2682f
59392ba
ec2682f
 
 
 
 
 
c129047
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM


model_checkpoint = "xlm-roberta-base"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
mask_token = tokenizer.mask_token

text = f"雨天,我整个人就便{mask_token}了,不想出外,甚至不想去上课。"
target_word = "懒惰"

def eval_prob(target_word, text):
    # Get index of target_word
    idx = tokenizer.encode(target_word)[2]

    # Get logits
    inputs = tokenizer(text, return_tensors="pt")
    token_logits = model(**inputs).logits
    
    # Find the location of the MASK and extract its logits
    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
    mask_token_logits = token_logits[0, mask_token_index, :]
    
    # Convert logits to softmax probability
    logits = mask_token_logits[0].tolist()
    probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]

    # Get probability of target word filling the MASK
    result = float(probs[idx])

    return round(result, 5)

gr.Interface(
    fn=eval_prob,
    inputs="text",
    outputs="text",
    title="Chinese Sentence Grading",
).launch()

# Plot bar chart of probs x target_words to find optimal cutoff

# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")

# def predict(image):
#     predictions = pipeline(image)
#     return {p["label"]: p["score"] for p in predictions}

# gr.Interface(
#     predict,
#     inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"),
#     outputs=gr.outputs.Label(num_top_classes=2),
#     title="Hot Dog? Or Not?",
# ).launch()