Spaces:

annalyzin
/

chinese_sentence_grading

Sleeping

App Files Files Community

Annalyn Ng commited on May 6, 2023

Commit

c129047

•

1 Parent(s): dfaaad7

update app to chinese sentence grading

Browse files

Files changed (1) hide show

app.py +51 -10

app.py CHANGED Viewed

@@ -1,15 +1,56 @@
 import gradio as gr
-from transformers import pipeline
-pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
-def predict(image):
-    predictions = pipeline(image)
-    return {p["label"]: p["score"] for p in predictions}
 gr.Interface(
-    predict,
-    inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"),
-    outputs=gr.outputs.Label(num_top_classes=2),
-    title="Hot Dog? Or Not?",
-).launch(share=True)

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForMaskedLM
+model_checkpoint = "xlm-roberta-base"
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
+mask_token = tokenizer.mask_token
+text = f"雨天，我整个人就便{mask_token}了，不想出外，甚至不想去上课。"
+target_word = "懒惰"
+def eval_prob(target_word, text):
+    # Get index of target_word
+    idx = tokenizer.encode(target_word)[2]
+    # Get logits
+    inputs = tokenizer(text, return_tensors="pt")
+    token_logits = model(**inputs).logits
+    # Find the location of the MASK and extract its logits
+    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+    mask_token_logits = token_logits[0, mask_token_index, :]
+    # Convert logits to softmax probability
+    logits = mask_token_logits[0].tolist()
+    probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
+    # Get probability of target word filling the MASK
+    result = float(probs[idx])
+    return round(result, 5)
 gr.Interface(
+    fn=eval_prob,
+    inputs="text",
+    outputs="text",
+    title="Chinese Sentence Grading",
+).launch(share=True)
+# Plot bar chart of probs x target_words to find optimal cutoff
+# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+# def predict(image):
+#     predictions = pipeline(image)
+#     return {p["label"]: p["score"] for p in predictions}
+# gr.Interface(
+#     predict,
+#     inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"),
+#     outputs=gr.outputs.Label(num_top_classes=2),
+#     title="Hot Dog? Or Not?",
+# ).launch()