Annalyn Ng commited on
Commit
3302270
1 Parent(s): 1d09c47

add barplot

Browse files
Files changed (2) hide show
  1. app.py +41 -38
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
 
2
  import pandas as pd
 
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForMaskedLM
5
 
@@ -18,8 +20,9 @@ def add_mask(target_word, text):
18
 
19
  def eval_prob(target_word, text):
20
  text_mask = add_mask(target_word, text)
 
21
  # Get index of target_word
22
- idx = tokenizer.encode(target_word)[2]
23
 
24
  # Get logits
25
  inputs = tokenizer(text_mask, return_tensors="pt")
@@ -34,54 +37,54 @@ def eval_prob(target_word, text):
34
  probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
35
 
36
  # Get probability of target word filling the MASK
37
- result = float(probs[idx])
 
 
38
 
39
- return result
40
 
 
 
41
 
42
- # test barplot
43
- simple = pd.DataFrame(
44
- {
45
- "item": ["A", "B", "C", "D", "E", "F", "G", "H", "I"],
46
- "inventory": [28, 55, 43, 91, 81, 53, 19, 87, 52],
47
- }
48
- )
49
 
50
- css = (
51
- "footer {display: none !important;} .gradio-container {min-height: 0px !important;}"
52
- )
53
 
54
- with gr.Blocks(css=css) as demo:
55
- gr.BarPlot(value=simple, x="item", y="inventory", title="Simple Bar Plot").style(
56
- container=False,
 
 
 
 
 
 
 
 
 
 
57
  )
58
- demo.launch(share=True)
 
 
59
 
60
 
61
  gr.Interface(
62
- fn=eval_prob,
63
  inputs=[
64
- gr.Textbox(label="词语", placeholder="夸大"),
65
- gr.Textbox(label="造句", placeholder=f"我们使用生成式人工智能已经很长时间了,所以最近的媒体报道可能被夸大了。"),
66
  ],
67
  examples=[
68
- ["夸大", "我们使用生成式人工智能已经很长时间了,所以最近的媒体报道可能被夸大了。"],
 
 
69
  ],
70
- outputs="number",
71
  title="Chinese Sentence Grading",
72
- ).launch(share=True)
73
-
74
- # Plot bar chart of probs x target_words to find optimal cutoff
75
-
76
- # pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
77
-
78
- # def predict(image):
79
- # predictions = pipeline(image)
80
- # return {p["label"]: p["score"] for p in predictions}
81
-
82
- # gr.Interface(
83
- # predict,
84
- # inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"),
85
- # outputs=gr.outputs.Label(num_top_classes=2),
86
- # title="Hot Dog? Or Not?",
87
- # ).launch()
 
1
  import gradio as gr
2
+ import numpy as np
3
  import pandas as pd
4
+ import plotly.express as px
5
  import torch
6
  from transformers import AutoTokenizer, AutoModelForMaskedLM
7
 
 
20
 
21
  def eval_prob(target_word, text):
22
  text_mask = add_mask(target_word, text)
23
+
24
  # Get index of target_word
25
+ target_idx = tokenizer.encode(target_word)[2]
26
 
27
  # Get logits
28
  inputs = tokenizer(text_mask, return_tensors="pt")
 
37
  probs = torch.nn.functional.softmax(torch.tensor([logits]), dim=1)[0]
38
 
39
  # Get probability of target word filling the MASK
40
+ # result = float(probs[target_idx])
41
+
42
+ return probs, target_idx
43
 
 
44
 
45
+ def plot_results(target_word, text):
46
+ probs, target_idx = eval_prob(target_word, text)
47
 
48
+ # Sort tokens based on probability scores
49
+ words = [
50
+ tokenizer.decode(idx) for idx in torch.sort(probs, descending=True).indices
51
+ ]
52
+ scores = torch.sort(probs, descending=True).values
 
 
53
 
54
+ # Consolidate results in dataframe
55
+ d = {"word": words, "score": scores}
56
+ df = pd.DataFrame(data=d)
57
 
58
+ # Get score rank of target word
59
+ result_rank = words.index(target_word)
60
+ target_col = [0] * len(scores)
61
+ target_col[result_rank] = 1
62
+ df["target"] = target_col
63
+
64
+ # Plot
65
+ fig = px.bar(
66
+ df[:100],
67
+ x="word",
68
+ y="score",
69
+ color="target",
70
+ color_continuous_scale=px.colors.sequential.Bluered,
71
  )
72
+ # fig.update(layout_coloraxis_showscale=False)
73
+ fig.show()
74
+ return fig
75
 
76
 
77
  gr.Interface(
78
+ fn=plot_results,
79
  inputs=[
80
+ gr.Textbox(label="词语", placeholder="标准"),
81
+ gr.Textbox(label="造句", placeholder="小明朗读课文时发音标准,被老师评为优秀。"),
82
  ],
83
  examples=[
84
+ ["聪明", "小明很聪明,每年考班上第一名。"],
85
+ ["尴尬", "小明去朋友的生日庆祝会,忘了带礼物,感到很尴尬。"],
86
+ ["标准", "小明朗读课文时发音标准,被老师评为优秀。"],
87
  ],
88
+ outputs=["plot"],
89
  title="Chinese Sentence Grading",
90
+ ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
  torch
3
  transformers
4
- pandas
 
 
 
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
  torch
3
  transformers
4
+ numpy
5
+ pandas
6
+ plotly.express