File size: 3,453 Bytes
aa805a6
 
 
 
 
 
 
 
 
 
 
 
 
 
5c5eade
 
 
 
aa805a6
 
5c5eade
aa805a6
 
3017b00
5c5eade
 
 
 
 
3017b00
 
 
5c5eade
aa805a6
5c5eade
 
 
aa805a6
 
 
5c5eade
aa805a6
b837ff1
 
 
 
 
 
 
3017b00
5c5eade
aa805a6
5c5eade
 
 
3017b00
 
 
 
 
 
5c5eade
 
419c509
5c5eade
 
 
 
 
3017b00
 
 
 
 
5c5eade
aa805a6
 
 
 
 
5c5eade
 
 
aa805a6
5c5eade
 
ed86205
 
 
 
 
5c5eade
 
ae2fa97
 
5c5eade
 
 
aa805a6
5c5eade
 
 
 
aa805a6
5c5eade
 
aa805a6
3017b00
ae2fa97
 
aa805a6
 
5c5eade
aa805a6
 
ed86205
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
from huggingface_hub import InferenceApi
import pandas as pd
from transformers import pipeline

STYLE = """
<style>
img {
    max-width: 100%;
}

th {
    text-align: left!important
}

td {
    font-size:
}
</style>
"""

MASK_TOKEN = "<mask>"

EMOJI_MAP = {
    "anger": "๐Ÿ˜ก",
    "fear": "๐Ÿ˜ฑ",
    "happy": "๐Ÿ˜„",
    "love": "๐Ÿ˜",
    "sadness": "๐Ÿ˜ญ",
    "positive": "๐Ÿค—",
    "negative": "๐Ÿ˜ค",
    "neutral": "๐Ÿ˜",
}


def display_table(df: pd.DataFrame, subheader: str):
    st.subheader(subheader)
    st.table(df)


def setup():
    st.markdown(STYLE, unsafe_allow_html=True)
    st.markdown(
        """
        # ๐Ÿ‡ฎ๐Ÿ‡ฉ Indonesian RoBERTa Base

        Demo Powered by [Indonesian RoBERTa Base](https://huggingface.co/flax-community/indonesian-roberta-base).
        """
    )
    st.sidebar.subheader("Settings")


def main():
    setup()

    analyze = st.sidebar.selectbox(
        "What should we analyze?",
        ("Emotion", "Sentiment"),
        help="Classifier model to choose for text analysis",
    )

    user_input = st.text_input(
        f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
        value=f"Gila! Hari ini aku {MASK_TOKEN} banget..",
    )

    mlm_model = "flax-community/indonesian-roberta-base"
    mask_api = InferenceApi(mlm_model)

    if analyze == "Emotion":
        sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
    elif analyze == "Sentiment":
        sa_model = "w11wo/indonesian-roberta-base-sentiment-classifier"

    sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model)

    if len(user_input) > 0:
        try:
            user_input.index(MASK_TOKEN)
        except ValueError:
            st.error(
                f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
            )
        else:
            # render masked language modeling table
            mlm_result = mask_api(inputs=user_input)

            if mlm_result == None:
                st.write("Model is loading. Please try again later...")
                return

            mlm_df = pd.DataFrame(mlm_result)
            mlm_df.drop(columns=["token", "token_str"], inplace=True)
            mlm_df_styled = mlm_df.copy(deep=False)
            mlm_df_styled = mlm_df_styled.style.set_properties(
                subset=["sequence", "score"], **{"text-align": "left"}
            )
            display_table(mlm_df_styled, "๐ŸŽˆ Top 5 Predictions")

            # render sentiment analysis table
            sa_df = pd.DataFrame(columns=["sequence", "label", "score"])
            for sequence in mlm_df["sequence"].values:
                sa_output = sa_pipeline(sequence)  # predict for every mlm output
                result_dict = {"sequence": sequence}
                result_dict.update(sa_output[0])
                sa_df = sa_df.append(result_dict, ignore_index=True)

            sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOJI_MAP[x])
            sa_df_styled = sa_df.copy(deep=False)
            sa_df_styled = sa_df_styled.style.set_properties(
                subset=["sequence", "label", "score"], **{"text-align": "left"}
            )
            display_table(sa_df_styled, "๐Ÿค” By saying that, I guess you are feeling..")


if __name__ == "__main__":
    main()