File size: 2,327 Bytes
ca0b7b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
from huggingface_hub import InferenceApi
import pandas as pd
from transformers import pipeline

STYLE = """
<style>
img {
    max-width: 100%;
}

th {
    text-align: left!important
}

td {
    font-size:
}
</style>
"""

MASK_TOKEN = "<mask>"

EMOJI_MAP = {
    "anger": "๐Ÿ˜ก",
    "fear": "๐Ÿ˜ฑ",
    "happy": "๐Ÿ˜„",
    "love": "๐Ÿ˜",
    "sadness": "๐Ÿ˜ญ",
    "positive": "๐Ÿค—",
    "negative": "๐Ÿ˜ค",
    "neutral": "๐Ÿ˜",
}


def display_table(df: pd.DataFrame, subheader: str):
    st.subheader(subheader)
    st.table(df)


def setup():
    st.markdown(STYLE, unsafe_allow_html=True)
    st.markdown(
        """
        # ๐Ÿ‡ฎ๐Ÿ‡ฉ Indonesian RoBERTa Base

        Demo Powered by [Indonesian RoBERTa Base](https://huggingface.co/flax-community/indonesian-roberta-base).
        """
    )
    st.sidebar.subheader("Settings")


def main():
    setup()

    analyze = st.sidebar.selectbox(
        "What should we analyze?",
        ("Emotion", "Sentiment"),
        help="Classifier model to choose for text analysis",
    )

    user_input = st.text_input(
        f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
        value=f"Gila! Hari ini aku {MASK_TOKEN} banget..",
    )

    mlm_model = "BigSalmon/BestMask2"
    mask_api = InferenceApi(mlm_model)

    if len(user_input) > 0:
        try:
            user_input.index(MASK_TOKEN)
        except ValueError:
            st.error(
                f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
            )
        else:
            # render masked language modeling table
            mlm_result = mask_api(inputs=user_input)

            if mlm_result == None:
                st.write("Model is loading. Please try again later...")
                return

            mlm_df = pd.DataFrame(mlm_result)
            mlm_df.drop(columns=["token", "token_str"], inplace=True)
            mlm_df_styled = mlm_df.copy(deep=False)
            mlm_df_styled = mlm_df_styled.style.set_properties(
                subset=["sequence", "score"], **{"text-align": "left"}
            )
            display_table(mlm_df_styled, "๐ŸŽˆ Top 5 Predictions")



if __name__ == "__main__":
    main()