import streamlit as st from functions import * from langchain.chains import QAGenerationChain import itertools st.set_page_config(page_title="Earnings Question/Answering", page_icon="🔎") st.sidebar.header("Semantic Search") st.markdown("Earnings Semantic Search with LangChain, OpenAI & SBert") st.markdown( """ """, unsafe_allow_html=True, ) bi_enc_dict = {'mpnet-base-v2':"all-mpnet-base-v2", 'instructor-base': 'hkunlp/instructor-base'} search_input = st.text_input( label='Enter Your Search Query',value= "What key challenges did the business face?", key='search') sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox') st.sidebar.markdown('Earnings QnA Generator') chunk_size = 1000 overlap_size = 50 try: if search_input: if "sen_df" in st.session_state and "earnings_passages" in st.session_state: ## Save to a dataframe for ease of visualization sen_df = st.session_state['sen_df'] title = st.session_state['title'] earnings_text = ','.join(st.session_state['earnings_passages']) print(f'earnings_to_be_embedded:{earnings_text}') st.session_state.eval_set = generate_eval( earnings_text, 10, 3000) # Display the question-answer pairs in the sidebar with smaller text for i, qa_pair in enumerate(st.session_state.eval_set): st.sidebar.markdown( f"""

Question {i + 1}

{qa_pair['question']}

{qa_pair['answer']}

""", unsafe_allow_html=True, ) embedding_model = bi_enc_dict[sbert_model_name] with st.spinner( text=f"Loading {embedding_model} embedding model and Generating Response..." ): print('cheeky') print(st.session_state['earnings_passages']) docsearch = process_corpus(st.session_state['earnings_passages'],title, embedding_model) result = embed_text(search_input,docsearch) references = [doc.page_content for doc in result['source_documents']] answer = result['answer'] sentiment_label = gen_sentiment(answer) ##### Sematic Search ##### df = pd.DataFrame.from_dict({'Text':[answer],'Sentiment':[sentiment_label]}) text_annotations = gen_annotated_text(df)[0] with st.expander(label='Query Result', expanded=True): annotated_text(text_annotations) with st.expander(label='References from Corpus used to Generate Result'): for ref in references: st.write(ref) else: st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file') else: st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file') except RuntimeError: st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')