import streamlit as st
import pandas as pd
from transformers import pipeline
from stqdm import stqdm
from simplet5 import SimpleT5
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import BertTokenizer, TFBertForSequenceClassification
from tensorflow.keras.models import load_model
from tensorflow.nn import softmax
import numpy as np
from datetime import datetime
import logging
from constants import sub_themes_dict

date = datetime.now().strftime(r"%Y-%m-%d")
model_classes = {
    0: "Ads",
    1: "Apps",
    2: "Battery",
    3: "Charging",
    4: "Delivery",
    5: "Display",
    6: "FOS",
    7: "HW",
    8: "Order",
    9: "Refurb",
    10: "SD",
    11: "Setup",
    12: "Unknown",
    13: "WiFi",
}


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
# @st.cache_resource
def load_t5():
    model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")

    tokenizer = AutoTokenizer.from_pretrained("t5-base")
    return model, tokenizer


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
# @st.cache_resource
def custom_model():
    return pipeline("summarization", model="my_awesome_sum/")


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
# @st.cache_resource
def convert_df(df):
    # IMPORTANT: Cache the conversion to prevent computation on every rerun
    return df.to_csv(index=False).encode("utf-8")


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
# @st.cache_resource
def load_one_line_summarizer(model):
    return model.load_model("t5", "snrspeaks/t5-one-line-summary")


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
# @st.cache_resource
def classify_category():
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    new_model = load_model("model")
    return tokenizer, new_model


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
# @st.cache_resource
def classify_sub_theme():
    tokenizer = BertTokenizer.from_pretrained(
        "ashhadahsan/amazon-subtheme-bert-base-finetuned"
    )
    new_model = TFBertForSequenceClassification.from_pretrained(
        "ashhadahsan/amazon-subtheme-bert-base-finetuned"
    )
    return tokenizer, new_model


st.set_page_config(layout="wide", page_title="Amazon Review Summarizer")
st.title("Amazon Review Summarizer")

uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "xls", "csv"])
summarizer_option = st.selectbox(
    "Select Summarizer",
    ("Custom trained on the dataset", "t5-base", "t5-one-line-summary"),
)
col1, col2, col3 = st.columns([1, 1, 1])

with col1:
    summary_yes = st.checkbox("Summrization", value=False)

with col2:
    classification = st.checkbox("Classify Category", value=True)

with col3:
    sub_theme = st.checkbox("Sub theme classification", value=True)

ps = st.empty()

if st.button("Process", type="primary"):
    cancel_button = st.empty()
    cancel_button2 = st.empty()
    cancel_button3 = st.empty()
    if uploaded_file is not None:
        if uploaded_file.name.split(".")[-1] in ["xls", "xlsx"]:
            df = pd.read_excel(uploaded_file, engine="openpyxl")
        if uploaded_file.name.split(".")[-1] in [".csv"]:
            df = pd.read_csv(uploaded_file)
        columns = df.columns.values.tolist()
        columns = [x.lower() for x in columns]
        df.columns = columns
        print(summarizer_option)
        output = pd.DataFrame()
        try:
            text = df["text"].values.tolist()
            output["text"] = text
            if summarizer_option == "Custom trained on the dataset":
                if summary_yes:
                    model = custom_model()

                    progress_text = "Summarization in progress. Please wait."
                    summary = []

                    for x in stqdm(range(len(text))):
                        if cancel_button.button("Cancel", key=x):
                            del model
                            break
                        try:
                            summary.append(
                                model(
                                    f"summarize: {text[x]}",
                                    max_length=50,
                                    early_stopping=True,
                                )[0]["summary_text"]
                            )
                        except:
                            pass
                    output["summary"] = summary
                    del model
                if classification:
                    classification_token, classification_model = classify_category()
                    tf_batch = classification_token(
                        text,
                        max_length=128,
                        padding=True,
                        truncation=True,
                        return_tensors="tf",
                    )
                    with st.spinner(text="identifying theme"):
                        tf_outputs = classification_model(tf_batch)
                    classes = []
                    with st.spinner(text="creating output file"):
                        for x in stqdm(range(len(text))):
                            tf_o = softmax(tf_outputs["logits"][x], axis=-1)
                            label = np.argmax(tf_o, axis=0)
                            keys = model_classes
                            classes.append(keys.get(label))
                        output["category"] = classes
                    del classification_token, classification_model
                if sub_theme:
                    classification_token, classification_model = classify_sub_theme()
                    tf_batch = classification_token(
                        text,
                        max_length=128,
                        padding=True,
                        truncation=True,
                        return_tensors="tf",
                    )
                    with st.spinner(text="identifying sub theme"):
                        tf_outputs = classification_model(tf_batch)
                    classes = []
                    with st.spinner(text="creating output file"):
                        for x in stqdm(range(len(text))):
                            tf_o = softmax(tf_outputs["logits"][x], axis=-1)
                            label = np.argmax(tf_o, axis=0)
                            keys = sub_themes_dict
                            classes.append(keys.get(label))
                        output["sub theme"] = classes
                    del classification_token, classification_model

                csv = convert_df(output)
                st.download_button(
                    label="Download data as CSV",
                    data=csv,
                    file_name=f"{summarizer_option}_{date}_df.csv",
                    mime="text/csv",
                )
            if summarizer_option == "t5-base":
                if summary_yes:
                    model, tokenizer = load_t5()
                    summary = []
                    for x in stqdm(range(len(text))):
                        if cancel_button2.button("Cancel", key=x):
                            del model, tokenizer
                            break
                        tokens_input = tokenizer.encode(
                            "summarize: " + text[x],
                            return_tensors="pt",
                            max_length=tokenizer.model_max_length,
                            truncation=True,
                        )
                        summary_ids = model.generate(
                            tokens_input,
                            min_length=80,
                            max_length=150,
                            length_penalty=20,
                            num_beams=2,
                        )
                        summary_gen = tokenizer.decode(
                            summary_ids[0], skip_special_tokens=True
                        )
                        summary.append(summary_gen)
                    del model, tokenizer
                    output["summary"] = summary

                if classification:
                    classification_token, classification_model = classify_category()
                    tf_batch = classification_token(
                        text,
                        max_length=128,
                        padding=True,
                        truncation=True,
                        return_tensors="tf",
                    )
                    with st.spinner(text="identifying theme"):
                        tf_outputs = classification_model(tf_batch)
                    classes = []
                    with st.spinner(text="creating output file"):
                        for x in stqdm(range(len(text))):
                            tf_o = softmax(tf_outputs["logits"][x], axis=-1)
                            label = np.argmax(tf_o, axis=0)
                            keys = model_classes
                            classes.append(keys.get(label))
                        output["category"] = classes
                    del classification_token, classification_model
                if sub_theme:
                    classification_token, classification_model = classify_sub_theme()
                    tf_batch = classification_token(
                        text,
                        max_length=128,
                        padding=True,
                        truncation=True,
                        return_tensors="tf",
                    )
                    with st.spinner(text="identifying sub theme"):
                        tf_outputs = classification_model(tf_batch)
                    classes = []
                    with st.spinner(text="creating output file"):
                        for x in stqdm(range(len(text))):
                            tf_o = softmax(tf_outputs["logits"][x], axis=-1)
                            label = np.argmax(tf_o, axis=0)
                            keys = sub_themes_dict
                            classes.append(keys.get(label))
                        output["sub theme"] = classes
                    del classification_token, classification_model
                csv = convert_df(output)
                st.download_button(
                    label="Download data as CSV",
                    data=csv,
                    file_name=f"{summarizer_option}_{date}_df.csv",
                    mime="text/csv",
                )

            if summarizer_option == "t5-one-line-summary":
                if summary_yes:
                    model = SimpleT5()
                    load_one_line_summarizer(model=model)

                    summary = []
                    for x in stqdm(range(len(text))):
                        if cancel_button3.button("Cancel", key=x):
                            del model
                            break
                        try:
                            summary.append(model.predict(text[x])[0])
                        except:
                            pass
                    output["summary"] = summary
                    del model

                if classification:
                    classification_token, classification_model = classify_category()
                    tf_batch = classification_token(
                        text,
                        max_length=128,
                        padding=True,
                        truncation=True,
                        return_tensors="tf",
                    )
                    with st.spinner(text="identifying theme"):
                        tf_outputs = classification_model(tf_batch)
                    classes = []
                    with st.spinner(text="creating output file"):
                        for x in stqdm(range(len(text))):
                            tf_o = softmax(tf_outputs["logits"][x], axis=-1)
                            label = np.argmax(tf_o, axis=0)
                            keys = model_classes
                            classes.append(keys.get(label))
                        output["category"] = classes
                    del classification_token, classification_model
                if sub_theme:
                    classification_token, classification_model = classify_sub_theme()
                    tf_batch = classification_token(
                        text,
                        max_length=128,
                        padding=True,
                        truncation=True,
                        return_tensors="tf",
                    )
                    with st.spinner(text="identifying sub theme"):
                        tf_outputs = classification_model(tf_batch)
                    classes = []
                    with st.spinner(text="creating output file"):
                        for x in stqdm(range(len(text))):
                            tf_o = softmax(tf_outputs["logits"][x], axis=-1)
                            label = np.argmax(tf_o, axis=0)
                            keys = sub_themes_dict
                            classes.append(keys.get(label))
                        output["sub theme"] = classes
                    del classification_token, classification_model

                csv = convert_df(output)
                st.download_button(
                    label="Download data as CSV",
                    data=csv,
                    file_name=f"{summarizer_option}_{date}_df.csv",
                    mime="text/csv",
                )

        except KeyError:
            st.error(
                "Please Make sure that your data must have a column named text",
                icon="🚨",
            )
            st.info("Text column must have amazon reviews", icon="ℹ️")
        except BaseException as e:
            logging.exception("An exception was occurred")