File size: 1,926 Bytes
aa805a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import streamlit as st
from huggingface_hub import InferenceApi
import pandas as pd
from transformers import pipeline

STYLE = """
<style>
img {
    max-width: 100%;
}

th {
    text-align: left!important
}
</style>
"""
MASK_TOKEN = "<mask>"


def display_table(df):
    st.subheader("Top 5 Prediction.")
    df.drop(columns=["token", "token_str"], inplace=True)
    df = df.style.set_properties(subset=["sequence", "score"], **{"text-align": "left"})
    st.table(df)


def main():
    st.markdown(STYLE, unsafe_allow_html=True)
    st.title("Indonesian RoBERTa Base")
    user_input = st.text_input("Insert a sentence to predict with a mask token: <mask>")
    mask_api = InferenceApi("flax-community/indonesian-roberta-base")

    emot_name = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
    emot_pipeline = pipeline("sentiment-analysis", model=emot_name, tokenizer=emot_name)

    if len(user_input) > 0:
        try:
            user_input.index(MASK_TOKEN)
        except ValueError:
            st.error("Please enter a sentence with the correct mask token: <mask>")
        else:
            # A List of dict with keys: sequence, score, token, token_str
            result = mask_api(inputs=user_input)
            df = pd.DataFrame(result)
            display_table(df)

            # emot
            st.subheader("Emotion Analysis of the Top 5 Prediction")
            emot_df = pd.DataFrame(columns=["sequence", "label", "score"])
            for sequence in df["sequence"].values:
                emot_output = emot_pipeline(sequence)

                result_dict = {"sequence": sequence}
                result_dict.update(emot_output[0])
                emot_df = emot_df.append(result_dict, ignore_index=True)

            emot_df = emot_df.style.set_properties(
                subset=["sequence", "label", "score"], **{"text-align": "left"}
            )
            st.table(emot_df)


main()