import streamlit as st
from functions import *

st.set_page_config(page_title="Earnings Semantic Search", page_icon="🔎")
st.sidebar.header("Semantic Search")
st.markdown("## Earnings Semantic Search with SBert")

def gen_sentiment(text):
    '''Generate sentiment of given text'''
    return sent_pipe(text)[0]['label']

bi_enc_dict = {'mpnet-base-v2':"all-mpnet-base-v2",
               'e5-base':'intfloat/e5-base',
              'instructor-base': 'hkunlp/instructor-base',
              'mpnet-base-dot-v1':'multi-qa-mpnet-base-dot-v1',
              'setfit-finance': 'nickmuchi/setfit-finetuned-financial-text-classification'}

search_input = st.text_input(
        label='Enter Your Search Query',value= "What key challenges did the business face?", key='search')
        
sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
        
top_k = 2

window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=7,value=3)

try:

    if search_input:
        
        if "sen_df" in st.session_state and "earnings_passages" in st.session_state:
        
            ## Save to a dataframe for ease of visualization
            sen_df = st.session_state['sen_df']
                
            passages = chunk_long_text(st.session_state['earnings_passages'],150,window_size=window_size)
            
            with st.spinner(
                text=f"Loading {bi_enc_dict[sbert_model_name]} encoder model..."
            ):
                sbert = load_sbert(bi_enc_dict[sbert_model_name])
                
            
            ##### Sematic Search #####
            # Encode the query using the bi-encoder and find potentially relevant passages
            # corpus_embeddings = sbert.encode(passages, convert_to_tensor=True, show_progress_bar=True)
            # question_embedding = sbert.encode(search_input, convert_to_tensor=True)
            # question_embedding = question_embedding.cpu()
            # hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k)
            # hits = hits[0]  # Get the hits for the first query
            
            # ##### Re-Ranking #####
            # # Now, score all retrieved passages with the cross_encoder
            # cross_inp = [[search_input, passages[hit['corpus_id']]] for hit in hits]
            # cross_scores = cross_encoder.predict(cross_inp)
            
            # # Sort results by the cross-encoder scores
            # for idx in range(len(cross_scores)):
            #     hits[idx]['cross-score'] = cross_scores[idx]
            
            # # Output of top-3 hits from re-ranker
            # hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
            embedding_model = bi_enc_dict[sbert_model_name]
            
            hits = embed_text(search_input,passages,embedding_model)

            score='cross-score'
            df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:int(top_k)]],columns=['Score','Text'])
            df['Score'] = round(df['Score'],2)
            df['Sentiment'] = df.Text.apply(gen_sentiment)
            
            def gen_annotated_text(df):
                '''Generate annotated text'''
                
                tag_list=[]
                for row in df.itertuples():
                    label = row[3]
                    text = row[2]
                    if label == 'Positive':
                        tag_list.append((text,label,'#8fce00'))
                    elif label == 'Negative':
                        tag_list.append((text,label,'#f44336'))
                    else:
                        tag_list.append((text,label,'#000000'))
                    
                return tag_list  
            
            text_annotations = gen_annotated_text(df)
    
            first, second = text_annotations[0], text_annotations[1]
            
            
            with st.expander(label='Best Search Query Result', expanded=True):
                annotated_text(first)
                
            with st.expander(label='Alternative Search Query Result'):
                annotated_text(second)
                
        else:
            
            st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
            
    else:
    
        st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')  
        
except RuntimeError:
  
    st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')