w11wo commited on
Commit
5c5eade
โ€ข
1 Parent(s): 4558ffa

improved demo

Browse files
Files changed (1) hide show
  1. app.py +51 -25
app.py CHANGED
@@ -12,52 +12,78 @@ img {
12
  th {
13
  text-align: left!important
14
  }
 
 
 
 
15
  </style>
16
  """
 
17
  MASK_TOKEN = "<mask>"
18
 
 
 
 
 
 
 
 
19
 
20
- def display_table(df):
21
- st.subheader("Top 5 Prediction.")
22
- df.drop(columns=["token", "token_str"], inplace=True)
23
- df = df.style.set_properties(subset=["sequence", "score"], **{"text-align": "left"})
24
  st.table(df)
25
 
26
 
27
- def main():
28
  st.markdown(STYLE, unsafe_allow_html=True)
29
- st.title("Indonesian RoBERTa Base")
30
- user_input = st.text_input("Insert a sentence to predict with a mask token: <mask>")
31
- mask_api = InferenceApi("flax-community/indonesian-roberta-base")
32
 
33
- emot_name = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
34
- emot_pipeline = pipeline("sentiment-analysis", model=emot_name, tokenizer=emot_name)
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  if len(user_input) > 0:
37
  try:
38
  user_input.index(MASK_TOKEN)
39
  except ValueError:
40
- st.error("Please enter a sentence with the correct mask token: <mask>")
 
 
41
  else:
42
- # A List of dict with keys: sequence, score, token, token_str
43
- result = mask_api(inputs=user_input)
44
- df = pd.DataFrame(result)
45
- display_table(df)
46
-
47
- # emot
48
- st.subheader("Emotion Analysis of the Top 5 Prediction")
49
- emot_df = pd.DataFrame(columns=["sequence", "label", "score"])
50
- for sequence in df["sequence"].values:
51
- emot_output = emot_pipeline(sequence)
52
 
 
 
 
 
53
  result_dict = {"sequence": sequence}
54
- result_dict.update(emot_output[0])
55
- emot_df = emot_df.append(result_dict, ignore_index=True)
56
 
57
- emot_df = emot_df.style.set_properties(
 
58
  subset=["sequence", "label", "score"], **{"text-align": "left"}
59
  )
60
- st.table(emot_df)
61
 
62
 
63
  main()
12
  th {
13
  text-align: left!important
14
  }
15
+
16
+ td {
17
+ font-size:
18
+ }
19
  </style>
20
  """
21
+
22
  MASK_TOKEN = "<mask>"
23
 
24
+ EMOTION_MAP = {
25
+ "anger": "๐Ÿ˜ก",
26
+ "fear": "๐Ÿ˜ฑ",
27
+ "happy": "๐Ÿ˜„",
28
+ "love": "๐Ÿ˜",
29
+ "sadness": "๐Ÿ˜ญ",
30
+ }
31
 
32
+
33
+ def display_table(df: pd.DataFrame, subheader: str):
34
+ st.subheader(subheader)
 
35
  st.table(df)
36
 
37
 
38
+ def setup():
39
  st.markdown(STYLE, unsafe_allow_html=True)
40
+ st.title("๐Ÿ‡ฎ๐Ÿ‡ฉ Indonesian RoBERTa Base")
41
+
 
42
 
43
+ def main():
44
+ setup()
45
+
46
+ user_input = st.text_input(
47
+ f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
48
+ value=f"Aduh... gimana nih.. hari ini {MASK_TOKEN} banget...",
49
+ )
50
+
51
+ mlm_model = "flax-community/indonesian-roberta-base"
52
+ mask_api = InferenceApi(mlm_model)
53
+
54
+ sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
55
+ sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model)
56
 
57
  if len(user_input) > 0:
58
  try:
59
  user_input.index(MASK_TOKEN)
60
  except ValueError:
61
+ st.error(
62
+ f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
63
+ )
64
  else:
65
+ # render masked language modeling table
66
+ mlm_result = mask_api(inputs=user_input)
67
+ mlm_df = pd.DataFrame(mlm_result)
68
+ mlm_df.drop(columns=["token", "token_str"], inplace=True)
69
+ mlm_df_styled = mlm_df.style.set_properties(
70
+ subset=["sequence", "score"], **{"text-align": "left"}
71
+ )
72
+ display_table(mlm_df_styled, "๐ŸŽˆ Top 5 Predictions")
 
 
73
 
74
+ # render sentiment analysis table
75
+ sa_df = pd.DataFrame(columns=["sequence", "label", "score"])
76
+ for sequence in mlm_df["sequence"].values:
77
+ sa_output = sa_pipeline(sequence) # predict for every mlm output
78
  result_dict = {"sequence": sequence}
79
+ result_dict.update(sa_output[0])
80
+ sa_df = sa_df.append(result_dict, ignore_index=True)
81
 
82
+ sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOTION_MAP[x])
83
+ sa_df_styled = sa_df.style.set_properties(
84
  subset=["sequence", "label", "score"], **{"text-align": "left"}
85
  )
86
+ display_table(sa_df_styled, "๐Ÿค” By saying that, I guess you are feeling..")
87
 
88
 
89
  main()