Spaces:

annalyzin
/

chinese_sentence_grading

Sleeping

App Files Files Community

annalyzin commited on May 7, 2023

Commit

ec2682f

•

1 Parent(s): bcea939

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -37

app.py CHANGED Viewed

@@ -1,52 +1,44 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()
-# import gradio as gr
-# import torch
-# from transformers import AutoTokenizer, AutoModelForMaskedLM
-# model_checkpoint = "xlm-roberta-base"
-# tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
-# model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
-# mask_token = tokenizer.mask_token
-# text = f"雨天，我整个人就便{mask_token}了，不想出外，甚至不想去上课。"
-# target_word = "懒惰"
-# def eval_prob(target_word, text):
-#     # Get index of target_word
-#     idx = tokenizer.encode(target_word)[2]
-#     # Get logits
-#     inputs = tokenizer(text, return_tensors="pt")
-#     token_logits = model(**inputs).logits
-#     # Find the location of the MASK and extract its logits
-#     mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
-#     mask_token_logits = token_logits[0, mask_token_index, :]
-#     # Convert logits to softmax probability
-#     logits = mask_token_logits[0].tolist()
-#     probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
-#     # Get probability of target word filling the MASK
-#     result = float(probs[idx])
-#     return round(result, 5)
-# gr.Interface(
-#     fn=eval_prob,
-#     inputs="text",
-#     outputs="text",
-#     title="Chinese Sentence Grading",
-# ).launch()
 # Plot bar chart of probs x target_words to find optimal cutoff

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForMaskedLM
+model_checkpoint = "xlm-roberta-base"
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
+mask_token = tokenizer.mask_token
+text = f"雨天，我整个人就便{mask_token}了，不想出外，甚至不想去上课。"
+target_word = "懒惰"
+def eval_prob(target_word, text):
+    # Get index of target_word
+    idx = tokenizer.encode(target_word)[2]
+    # Get logits
+    inputs = tokenizer(text, return_tensors="pt")
+    token_logits = model(**inputs).logits
+    # Find the location of the MASK and extract its logits
+    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+    mask_token_logits = token_logits[0, mask_token_index, :]
+    # Convert logits to softmax probability
+    logits = mask_token_logits[0].tolist()
+    probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
+    # Get probability of target word filling the MASK
+    result = float(probs[idx])
+    return round(result, 5)
+gr.Interface(
+    fn=eval_prob,
+    inputs="text",
+    outputs="text",
+    title="Chinese Sentence Grading",
+).launch()
 # Plot bar chart of probs x target_words to find optimal cutoff