Quang Duong

"""
@author: Tan Quang Duong
"""


import streamlit as st
import hydralit_components as hc
from hydralit_components import HyLoader, Loaders
import pandas as pd
import numpy as np
from sklearn import metrics
from utils import (
    inference_from_pytorch,
    plot_confusion_matric,
    plot_donut_sentiment_percentage,
    create_classification_report,
    get_100_random_test_review,
)
from PIL import Image


# setting logos in the page
app_logo = Image.open("./figs/AI-driven-Solutions.png")

# set page config
st.set_page_config(page_title="Review Sentiment Analysis", page_icon="🚀", layout="wide")
st.sidebar.image(app_logo, use_column_width=True)
st.sidebar.markdown(
    "<h1 style='text-align: center; color: grey;'> Quang Duong </h1>",
    unsafe_allow_html=True,
)

# specify the primary menu definition
menu_data = [{"id": "tab1", "icon": "😊😒", "label": "Review Sentiment Analysis"}]

over_theme = {
    "menu_background": "#7BB657",
    "txc_active": "#000000",
    "txc_inactive": "#FFFFFF",
}
menu_id = hc.nav_bar(
    menu_definition=menu_data,
    override_theme=over_theme,
    # home_name='Home',
    # login_name='Logout',
    hide_streamlit_markers=False,  # will show the st hamburger as well as the navbar now!
    sticky_nav=True,  # at the top or not
    sticky_mode="pinned",  # jumpy or not-jumpy, but sticky or pinned
)

# Load tokenizer from st.session_state if exist
if "tokenizer" in st.session_state:
    tokenizer = st.session_state["tokenizer"]
else:
    st.write(
        "Please come back to Home page for loading tokenizer, model and dataset from Hugging Face hub."
    )

# load model from st.session_state if exist
if "model" in st.session_state:
    model = st.session_state["model"]

# load test imdb dataset from st.session_state if exist
if "df_imdb_test" in st.session_state:
    df_test = st.session_state["df_imdb_test"]

# create boolean variable for checking if df_test_100 is loaded
if "is_df_test_100_loaded" not in st.session_state:
    st.session_state["is_df_test_100_loaded"] = False

# create a map of the expected ids to their labels
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

with HyLoader("", loader_name=Loaders.pulse_bars):
    if menu_id == "tab1":
        input_mode = st.radio(
            "**Select input mode** 👇",
            ("Review streaming", "Add review manually"),
            horizontal=True,
        )

        # ner for querry from
        if input_mode == "Review streaming":
            if st.button("Simulate streaming 100 random reviews"):
                # get 100 random reviews as dataframe df_test_100
                df_test_100 = get_100_random_test_review(df_test)
                st.session_state["df_test_100"] = df_test_100

                # display 100 random reviews
                st.dataframe(df_test_100, use_container_width=True)
                st.session_state["is_df_test_100_loaded"] = True

            if st.session_state["is_df_test_100_loaded"]:
                if st.button("Inference"):
                    # make prediction on 100 reviews
                    df_test_100_loaded = st.session_state["df_test_100"]
                    df_test_100_loaded["predicted_class_id"] = df_test_100_loaded[
                        "text"
                    ].apply(lambda x: inference_from_pytorch(x, tokenizer, model)[0])
                    df_test_100_loaded["predicted_class"] = df_test_100_loaded[
                        "text"
                    ].apply(lambda x: inference_from_pytorch(x, tokenizer, model)[1])

                    st.write("Sentiment analysis completed! Here is the result: 👇")
                    # display dataframe
                    st.dataframe(df_test_100_loaded, use_container_width=True)

                    # label prediction count
                    class_count = df_test_100_loaded.predicted_class_id.value_counts()
                    class_count_val = class_count.values.tolist()
                    class_count_id = class_count.index.tolist()
                    pred_labels = {
                        "label": [id2label[x] for x in class_count_id],
                        "count": class_count_val,
                    }
                    df_pred_labels = pd.DataFrame(pred_labels)

                    # calculate confusion matrix
                    confusion_matrix = metrics.confusion_matrix(
                        df_test_100_loaded.class_id,
                        df_test_100_loaded.predicted_class_id,
                    )

                    # get classification report
                    df_report = create_classification_report(
                        df_test_100_loaded.class_id,
                        df_test_100_loaded.predicted_class_id,
                    )

                    col1, col2 = st.columns(2, gap="large")
                    with col1:
                        # plot donut chart for sentiment percentage
                        st.pyplot(plot_donut_sentiment_percentage(df_pred_labels))
                    with col2:
                        # plot confusion matrix
                        st.pyplot(plot_confusion_matric(confusion_matrix))

                    # display classification report
                    st.dataframe(df_report, use_container_width=True)

        # ner for manually add text
        elif input_mode == "Add review manually":
            text_input = st.text_area("Type your review here:", height=200)
            if text_input:
                st.write(
                    "Predicted sentiment: **{}**".format(
                        inference_from_pytorch(text_input, tokenizer, model)[1]
                    )
                )