Spaces:

flax-community
/

roberta-indonesian

Runtime error

File size: 3,453 Bytes

aa805a6
 
 
 
 
 
 
 
 
 
 
 
 
 
5c5eade
 
 
 
aa805a6
 
5c5eade
aa805a6
 
3017b00
5c5eade
 
 
 
 
3017b00
 
 
5c5eade
aa805a6
5c5eade
 
 
aa805a6
 
 
5c5eade
aa805a6
b837ff1
 
 
 
 
 
 
3017b00
5c5eade
aa805a6
5c5eade
 
 
3017b00
 
 
 
 
 
5c5eade
 
419c509
5c5eade
 
 
 
 
3017b00
 
 
 
 
5c5eade
aa805a6
 
 
 
 
5c5eade
 
 
aa805a6
5c5eade
 
ed86205
 
 
 
 
5c5eade
 
ae2fa97
 
5c5eade
 
 
aa805a6
5c5eade
 
 
 
aa805a6
5c5eade
 
aa805a6
3017b00
ae2fa97
 
aa805a6
 
5c5eade
aa805a6
 
ed86205

import streamlit as st
from huggingface_hub import InferenceApi
import pandas as pd
from transformers import pipeline

STYLE = """
<style>
img {
    max-width: 100%;
}

th {
    text-align: left!important
}

td {
    font-size:
}
</style>
"""

MASK_TOKEN = "<mask>"

EMOJI_MAP = {
    "anger": "😡",
    "fear": "😱",
    "happy": "😄",
    "love": "😍",
    "sadness": "😭",
    "positive": "🤗",
    "negative": "😤",
    "neutral": "😐",
}


def display_table(df: pd.DataFrame, subheader: str):
    st.subheader(subheader)
    st.table(df)


def setup():
    st.markdown(STYLE, unsafe_allow_html=True)
    st.markdown(
        """
        # 🇮🇩 Indonesian RoBERTa Base

        Demo Powered by [Indonesian RoBERTa Base](https://huggingface.co/flax-community/indonesian-roberta-base).
        """
    )
    st.sidebar.subheader("Settings")


def main():
    setup()

    analyze = st.sidebar.selectbox(
        "What should we analyze?",
        ("Emotion", "Sentiment"),
        help="Classifier model to choose for text analysis",
    )

    user_input = st.text_input(
        f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
        value=f"Gila! Hari ini aku {MASK_TOKEN} banget..",
    )

    mlm_model = "flax-community/indonesian-roberta-base"
    mask_api = InferenceApi(mlm_model)

    if analyze == "Emotion":
        sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
    elif analyze == "Sentiment":
        sa_model = "w11wo/indonesian-roberta-base-sentiment-classifier"

    sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model)

    if len(user_input) > 0:
        try:
            user_input.index(MASK_TOKEN)
        except ValueError:
            st.error(
                f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
            )
        else:
            # render masked language modeling table
            mlm_result = mask_api(inputs=user_input)

            if mlm_result == None:
                st.write("Model is loading. Please try again later...")
                return

            mlm_df = pd.DataFrame(mlm_result)
            mlm_df.drop(columns=["token", "token_str"], inplace=True)
            mlm_df_styled = mlm_df.copy(deep=False)
            mlm_df_styled = mlm_df_styled.style.set_properties(
                subset=["sequence", "score"], **{"text-align": "left"}
            )
            display_table(mlm_df_styled, "🎈 Top 5 Predictions")

            # render sentiment analysis table
            sa_df = pd.DataFrame(columns=["sequence", "label", "score"])
            for sequence in mlm_df["sequence"].values:
                sa_output = sa_pipeline(sequence)  # predict for every mlm output
                result_dict = {"sequence": sequence}
                result_dict.update(sa_output[0])
                sa_df = sa_df.append(result_dict, ignore_index=True)

            sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOJI_MAP[x])
            sa_df_styled = sa_df.copy(deep=False)
            sa_df_styled = sa_df_styled.style.set_properties(
                subset=["sequence", "label", "score"], **{"text-align": "left"}
            )
            display_table(sa_df_styled, "🤔 By saying that, I guess you are feeling..")


if __name__ == "__main__":
    main()