annalyzin's picture
Update app.py
ec2682f
raw
history blame
1.72 kB
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM
model_checkpoint = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
mask_token = tokenizer.mask_token
text = f"雨天,我整个人就便{mask_token}了,不想出外,甚至不想去上课。"
target_word = "懒惰"
def eval_prob(target_word, text):
# Get index of target_word
idx = tokenizer.encode(target_word)[2]
# Get logits
inputs = tokenizer(text, return_tensors="pt")
token_logits = model(**inputs).logits
# Find the location of the MASK and extract its logits
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
mask_token_logits = token_logits[0, mask_token_index, :]
# Convert logits to softmax probability
logits = mask_token_logits[0].tolist()
probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
# Get probability of target word filling the MASK
result = float(probs[idx])
return round(result, 5)
gr.Interface(
fn=eval_prob,
inputs="text",
outputs="text",
title="Chinese Sentence Grading",
).launch()
# Plot bar chart of probs x target_words to find optimal cutoff
# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
# def predict(image):
# predictions = pipeline(image)
# return {p["label"]: p["score"] for p in predictions}
# gr.Interface(
# predict,
# inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"),
# outputs=gr.outputs.Label(num_top_classes=2),
# title="Hot Dog? Or Not?",
# ).launch()