Spaces:

annalyzin
/

chinese_sentence_grading

Sleeping

File size: 1,723 Bytes

59392ba
ec2682f
 
59392ba
c129047
ec2682f
c129047
ec2682f
 
 
c129047
ec2682f
 
c129047
ec2682f
 
 
b9d5dc1
ec2682f
 
 
c129047
ec2682f
 
 
c129047
ec2682f
 
 
c129047
ec2682f
 
c129047
ec2682f
59392ba
ec2682f
 
 
 
 
 
c129047

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM


model_checkpoint = "xlm-roberta-base"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
mask_token = tokenizer.mask_token

text = f"雨天，我整个人就便{mask_token}了，不想出外，甚至不想去上课。"
target_word = "懒惰"

def eval_prob(target_word, text):
    # Get index of target_word
    idx = tokenizer.encode(target_word)[2]

    # Get logits
    inputs = tokenizer(text, return_tensors="pt")
    token_logits = model(**inputs).logits
    
    # Find the location of the MASK and extract its logits
    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
    mask_token_logits = token_logits[0, mask_token_index, :]
    
    # Convert logits to softmax probability
    logits = mask_token_logits[0].tolist()
    probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]

    # Get probability of target word filling the MASK
    result = float(probs[idx])

    return round(result, 5)

gr.Interface(
    fn=eval_prob,
    inputs="text",
    outputs="text",
    title="Chinese Sentence Grading",
).launch()

# Plot bar chart of probs x target_words to find optimal cutoff

# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")

# def predict(image):
#     predictions = pipeline(image)
#     return {p["label"]: p["score"] for p in predictions}

# gr.Interface(
#     predict,
#     inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"),
#     outputs=gr.outputs.Label(num_top_classes=2),
#     title="Hot Dog? Or Not?",
# ).launch()