Simon Salmon commited on
Commit
ca0b7b1
โ€ข
1 Parent(s): 74311cf

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from huggingface_hub import InferenceApi
3
+ import pandas as pd
4
+ from transformers import pipeline
5
+
6
+ STYLE = """
7
+ <style>
8
+ img {
9
+ max-width: 100%;
10
+ }
11
+
12
+ th {
13
+ text-align: left!important
14
+ }
15
+
16
+ td {
17
+ font-size:
18
+ }
19
+ </style>
20
+ """
21
+
22
+ MASK_TOKEN = "<mask>"
23
+
24
+ EMOJI_MAP = {
25
+ "anger": "๐Ÿ˜ก",
26
+ "fear": "๐Ÿ˜ฑ",
27
+ "happy": "๐Ÿ˜„",
28
+ "love": "๐Ÿ˜",
29
+ "sadness": "๐Ÿ˜ญ",
30
+ "positive": "๐Ÿค—",
31
+ "negative": "๐Ÿ˜ค",
32
+ "neutral": "๐Ÿ˜",
33
+ }
34
+
35
+
36
+ def display_table(df: pd.DataFrame, subheader: str):
37
+ st.subheader(subheader)
38
+ st.table(df)
39
+
40
+
41
+ def setup():
42
+ st.markdown(STYLE, unsafe_allow_html=True)
43
+ st.markdown(
44
+ """
45
+ # ๐Ÿ‡ฎ๐Ÿ‡ฉ Indonesian RoBERTa Base
46
+
47
+ Demo Powered by [Indonesian RoBERTa Base](https://huggingface.co/flax-community/indonesian-roberta-base).
48
+ """
49
+ )
50
+ st.sidebar.subheader("Settings")
51
+
52
+
53
+ def main():
54
+ setup()
55
+
56
+ analyze = st.sidebar.selectbox(
57
+ "What should we analyze?",
58
+ ("Emotion", "Sentiment"),
59
+ help="Classifier model to choose for text analysis",
60
+ )
61
+
62
+ user_input = st.text_input(
63
+ f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
64
+ value=f"Gila! Hari ini aku {MASK_TOKEN} banget..",
65
+ )
66
+
67
+ mlm_model = "BigSalmon/BestMask2"
68
+ mask_api = InferenceApi(mlm_model)
69
+
70
+ if analyze == "Emotion":
71
+ sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
72
+ elif analyze == "Sentiment":
73
+ sa_model = "w11wo/indonesian-roberta-base-sentiment-classifier"
74
+
75
+ sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model)
76
+
77
+ if len(user_input) > 0:
78
+ try:
79
+ user_input.index(MASK_TOKEN)
80
+ except ValueError:
81
+ st.error(
82
+ f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
83
+ )
84
+ else:
85
+ # render masked language modeling table
86
+ mlm_result = mask_api(inputs=user_input)
87
+
88
+ if mlm_result == None:
89
+ st.write("Model is loading. Please try again later...")
90
+ return
91
+
92
+ mlm_df = pd.DataFrame(mlm_result)
93
+ mlm_df.drop(columns=["token", "token_str"], inplace=True)
94
+ mlm_df_styled = mlm_df.copy(deep=False)
95
+ mlm_df_styled = mlm_df_styled.style.set_properties(
96
+ subset=["sequence", "score"], **{"text-align": "left"}
97
+ )
98
+ display_table(mlm_df_styled, "๐ŸŽˆ Top 5 Predictions")
99
+
100
+ # render sentiment analysis table
101
+ sa_df = pd.DataFrame(columns=["sequence", "label", "score"])
102
+ for sequence in mlm_df["sequence"].values:
103
+ sa_output = sa_pipeline(sequence) # predict for every mlm output
104
+ result_dict = {"sequence": sequence}
105
+ result_dict.update(sa_output[0])
106
+ sa_df = sa_df.append(result_dict, ignore_index=True)
107
+
108
+ sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOJI_MAP[x])
109
+ sa_df_styled = sa_df.copy(deep=False)
110
+ sa_df_styled = sa_df_styled.style.set_properties(
111
+ subset=["sequence", "label", "score"], **{"text-align": "left"}
112
+ )
113
+ display_table(sa_df_styled, "๐Ÿค” By saying that, I guess you are feeling..")
114
+
115
+
116
+ if __name__ == "__main__":
117
+ main()