Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForMaskedLM | |
model_checkpoint = "xlm-roberta-base" | |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint) | |
mask_token = tokenizer.mask_token | |
text = f"雨天,我整个人就便{mask_token}了,不想出外,甚至不想去上课。" | |
target_word = "懒惰" | |
def eval_prob(target_word, text): | |
# Get index of target_word | |
idx = tokenizer.encode(target_word)[2] | |
# Get logits | |
inputs = tokenizer(text, return_tensors="pt") | |
token_logits = model(**inputs).logits | |
# Find the location of the MASK and extract its logits | |
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1] | |
mask_token_logits = token_logits[0, mask_token_index, :] | |
# Convert logits to softmax probability | |
logits = mask_token_logits[0].tolist() | |
probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0] | |
# Get probability of target word filling the MASK | |
result = float(probs[idx]) | |
return round(result, 5) | |
gr.Interface( | |
fn=eval_prob, | |
inputs="text", | |
outputs="text", | |
title="Chinese Sentence Grading", | |
).launch() | |
# Plot bar chart of probs x target_words to find optimal cutoff | |
# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog") | |
# def predict(image): | |
# predictions = pipeline(image) | |
# return {p["label"]: p["score"] for p in predictions} | |
# gr.Interface( | |
# predict, | |
# inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"), | |
# outputs=gr.outputs.Label(num_top_classes=2), | |
# title="Hot Dog? Or Not?", | |
# ).launch() | |