# set path
import glob, os, sys; 
sys.path.append('../utils')

#import needed libraries
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import streamlit as st
from utils.vulnerability_classifier import load_vulnerabilityClassifier, vulnerability_classification
import logging
logger = logging.getLogger(__name__)
from utils.config import get_classifier_params
from utils.preprocessing import paraLengthCheck
from io import BytesIO
import xlsxwriter
import plotly.express as px
from utils.vulnerability_classifier import label_dict


# Declare all the necessary variables
classifier_identifier = 'vulnerability'
params  = get_classifier_params(classifier_identifier)

@st.cache_data
def to_excel(df,sectorlist):
    len_df = len(df)
    output = BytesIO()
    writer = pd.ExcelWriter(output, engine='xlsxwriter')
    df.to_excel(writer, index=False, sheet_name='Sheet1')
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']
    worksheet.data_validation('S2:S{}'.format(len_df), 
                              {'validate': 'list', 
                               'source': ['No', 'Yes', 'Discard']})
    worksheet.data_validation('X2:X{}'.format(len_df), 
                              {'validate': 'list', 
                               'source': sectorlist + ['Blank']})
    worksheet.data_validation('T2:T{}'.format(len_df), 
                              {'validate': 'list', 
                               'source': sectorlist + ['Blank']})
    worksheet.data_validation('U2:U{}'.format(len_df), 
                              {'validate': 'list', 
                               'source': sectorlist + ['Blank']})                               
    worksheet.data_validation('V2:V{}'.format(len_df), 
                              {'validate': 'list', 
                               'source': sectorlist + ['Blank']})
    worksheet.data_validation('W2:U{}'.format(len_df), 
                              {'validate': 'list', 
                               'source': sectorlist + ['Blank']})                            
    writer.save()
    processed_data = output.getvalue()
    return processed_data

def app():

    ### Main app code ###
    with st.container():

            # If a document has been processed                   
            if 'key0' in st.session_state:

                # Run vulnerability classifier
                df = st.session_state.key0
                classifier = load_vulnerabilityClassifier(classifier_name=params['model_name'])
                st.session_state['{}_classifier'.format(classifier_identifier)] = classifier

    
                # Get the predictions    
                df = vulnerability_classification(haystack_doc=df,
                                            threshold= params['threshold'])

                # Store df in session state with key1
                st.session_state.key1 = df


def vulnerability_display(): 
    
    # Assign dataframe a name
    df_vul = st.session_state['key0']
    #st.write(df_vul)
    
    col1, col2 = st.columns([1,1])
    
    with col1:
        
        # Header
        st.subheader("Explore references to vulnerable groups:")

        # Text 
        num_paragraphs = len(df_vul['Vulnerability Label'])
        num_references = df_vul['Vulnerability Label'].apply(lambda x: 'Other' not in x).sum()
       
        st.markdown(f"""<div style="text-align: justify;"> The document contains a
                total of <span style="color: red;">{num_paragraphs}</span> paragraphs.
                We identified <span style="color: red;">{num_references}</span>
                references to vulnerable groups.</div>
                <br>
                In the pie chart on the right you can see the distribution of the different 
                groups defined. For a more detailed view in the text, see the paragraphs and 
                their respective labels in the table below.</div>""", unsafe_allow_html=True)

    with col2:
        
        ### Bar chart
                    
        # # Create a df that stores all the labels
        df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label'])

        # Count how often each label appears in the "Vulnerability Labels" column
        group_counts = {}

        # Iterate through each sublist
        for index, row in df_vul.iterrows():
            
            # Iterate through each group in the sublist
            for sublist in row['Vulnerability Label']:
                
                # Update the count in the dictionary
                group_counts[sublist] = group_counts.get(sublist, 0) + 1

        # Create a new dataframe from group_counts
        df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count'])

        # Merge the label counts with the df_label DataFrame
        df_label_count = df_labels.merge(df_label_count, on='Label', how='left')
        st.write("df_label_count")

        # Bar chart
        fig = px.bar(df, 
                     x='Label', 
                     y='Count', 
                     title='How many references have been found to each group?',
                     labels={'Count': 'Frequency'})
        
    #     # Configure graph
    #     fig = px.pie(df_labels,
    #             names="Label", 
    #             values="Count",
    #             title='Label Counts',
    #             hover_name="Count",
    #             color_discrete_sequence=px.colors.qualitative.Plotly
    #     )
        
    #     #Show plot
    #     st.plotly_chart(fig, use_container_width=True)

    # ### Table 
    st.write(df[df['groups_column'].apply(lambda x: 'Other' not in x)])