import html
import os
from typing import AnyStr

import nltk
import streamlit as st
import validators
from transformers import pipeline
from validators import ValidationFailure

from Summarizer import Summarizer


def main() -> None:
    nltk.download('punkt')

    st.markdown('# Terms & Conditions Summarizer :pencil:')
    st.markdown('Do you also always take the time out of your day to thoroughly read every word of the Terms & Conditions before signing up to an app like the responsible citizen that you are?  :thinking_face:<br>'
                'No?<br>'
                "Well don't worry, neither do we! That's why we created a <b>Terms & Conditions Summarization</b> algorithm!", unsafe_allow_html=True)
    st.markdown('Just copy-paste that pesky Terms & Conditions text or provide a URL to the text and let our fancy NLP algorithm do the rest!<br>'
                'You will see both an extractive summary (the most important sentences will be highlighted) and an abstractive summary (an actual summary)<br>'
                'The abstractive summary will give you an idea of what the key message of the document likely is :bulb:', unsafe_allow_html=True)
    st.markdown('<b>Want to find out more?</b> :brain:<br>'
             'For details about the extractive part :point_right: https://en.wikipedia.org/wiki/Latent_semantic_analysis<br>'
             'For details about the abstractive part :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr', unsafe_allow_html=True)

    @st.cache(allow_output_mutation=True,
              suppress_st_warning=True,
              show_spinner=False)
    def create_pipeline():
        with st.spinner('Please wait for the model to load...'):
            terms_and_conditions_pipeline = pipeline(
                task='summarization',
                model='ml6team/distilbart-tos-summarizer-tosdr',
                tokenizer='ml6team/distilbart-tos-summarizer-tosdr'
            )
        return terms_and_conditions_pipeline

    def display_abstractive_summary(summary_sentences: list) -> None:
        st.subheader("Abstractive Summary")
        st.markdown('#####')
        for sentence in summary_sentences:
            st.markdown(f"- {sentence}", unsafe_allow_html=True)

    def display_extractive_summary(terms_and_conditions_text: str, summary_sentences: list) -> None:
        st.subheader("Extractive Summary")
        st.markdown('#####')
        replaced_text = html.escape(terms_and_conditions_text)
        for sentence in summary_sentences:
            escaped_sentence = html.escape(sentence)
            replaced_text = replaced_text.replace(escaped_sentence,
                                                  f"<p>"
                                                  f"<span style='background-color: yellow'>{escaped_sentence}</span>"
                                                  f"</p>")
            replaced_text = replaced_text.replace('\n', '<br/>')
        with st.container():
            st.write(f"<p>{replaced_text}</p>", unsafe_allow_html=True)

    def is_valid_url(url: str) -> bool:
        result = validators.url(url)
        if isinstance(result, ValidationFailure):
            return False
        return True

    def list_all_filenames() -> list:
        filenames = []
        for file in os.listdir('./sample-terms-and-conditions/'):
            if file.endswith('.txt'):
                filenames.append(file.replace('.txt', ''))
        return filenames

    def fetch_file_contents(filename: str) -> AnyStr:
        with open(f'./sample-terms-and-conditions/{filename.lower()}.txt', 'r') as f:
            data = f.read()
        return data

    summarizer: Summarizer = Summarizer(create_pipeline())

    if 'tc_text' not in st.session_state:
        st.session_state['tc_text'] = ''

    if 'sentences_length' not in st.session_state:
        st.session_state['sentences_length'] = Summarizer.DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH

    if 'sample_choice' not in st.session_state:
        st.session_state['sample_choice'] = ''

    st.header("Input")

    sentences_length = st.number_input(
        label='Number of sentences to be extracted:',
        min_value=5,
        max_value=15,
        value=st.session_state.sentences_length
    )
    sample_choice = st.selectbox(
        'Choose a sample terms & conditions:',
        list_all_filenames())
    st.session_state.tc_text = fetch_file_contents(sample_choice)
    tc_text_input = st.text_area(
        value=st.session_state.tc_text,
        label='Terms & conditions content or specify an URL:',
        height=240
    )

    summarize_button = st.button(label='Summarize')

    @st.cache(suppress_st_warning=True,
              show_spinner=False,
              allow_output_mutation=True,
              hash_funcs={"torch.nn.parameter.Parameter": lambda _: None,
                          "tokenizers.Tokenizer": lambda _: None,
                          "tokenizers.AddedToken": lambda _: None,
                          })
    def abstractive_summary_from_cache(summary_sentences: tuple) -> tuple:
        with st.spinner('Summarizing the text is in progress...'):
            return tuple(summarizer.abstractive_summary(list(summary_sentences)))

    if summarize_button:

        if is_valid_url(tc_text_input):
            extract_summary_sentences = summarizer.extractive_summary_from_url(tc_text_input, sentences_length)
        else:
            extract_summary_sentences = summarizer.extractive_summary_from_text(tc_text_input, sentences_length)

        extract_summary_sentences_tuple = tuple(extract_summary_sentences)
        abstract_summary_tuple = abstractive_summary_from_cache(extract_summary_sentences_tuple)
        abstract_summary_list = list(abstract_summary_tuple)

        display_abstractive_summary(abstract_summary_list)
        display_extractive_summary(tc_text_input, extract_summary_sentences)


if __name__ == "__main__":
    main()