File size: 4,504 Bytes
411678e
31b6e92
64af83f
 
 
411678e
ce57a20
64af83f
411678e
3b52176
4bed905
446f9c9
64af83f
 
 
16f2ce2
64af83f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16f2ce2
64af83f
 
 
 
 
 
 
 
e232116
446f9c9
abadefe
d98d50d
3b52176
e741287
3b52176
446f9c9
b38b575
d29d938
e694dea
64af83f
 
 
a957eeb
 
 
741aa8b
a957eeb
741aa8b
a957eeb
 
e232116
 
09e96c9
7b09818
64af83f
e3a147c
 
64af83f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09e96c9
e232116
a957eeb
09e96c9
a957eeb
e3a147c
7b09818
 
446f9c9
1cc24e6
e232116
c712f91
b26f818
e232116
016722b
e232116
 
a957eeb
e232116
a957eeb
09c79f1
e232116
a957eeb
e232116
a957eeb
e232116
 
a957eeb
e232116
 
 
a957eeb
 
 
 
741aa8b
 
a957eeb
 
741aa8b
82bf281
a957eeb
953c510
8eb51fc
ee6d004
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import streamlit as st
from functions import *
from langchain.chains import QAGenerationChain
import itertools


st.set_page_config(page_title="Earnings Question/Answering", page_icon="πŸ”Ž")

st.sidebar.header("Semantic Search")

st.markdown("Earnings Semantic Search with LangChain, OpenAI & SBert")

st.markdown(
    """
    <style>
    
    #MainMenu {visibility: hidden;
    # }
        footer {visibility: hidden;
        }
        .css-card {
            border-radius: 0px;
            padding: 30px 10px 10px 10px;
            background-color: black;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            margin-bottom: 10px;
            font-family: "IBM Plex Sans", sans-serif;
        }
        
        .card-tag {
            border-radius: 0px;
            padding: 1px 5px 1px 5px;
            margin-bottom: 10px;
            position: absolute;
            left: 0px;
            top: 0px;
            font-size: 0.6rem;
            font-family: "IBM Plex Sans", sans-serif;
            color: white;
            background-color: green;
            }
            
        .css-zt5igj {left:0;
        }
        
        span.css-10trblm {margin-left:0;
        }
        
        div.css-1kyxreq {margin-top: -40px;
        }
        
       
   
        
      
    </style>
    """,
    unsafe_allow_html=True,
)

bi_enc_dict = {'mpnet-base-v2':"all-mpnet-base-v2",
              'instructor-base': 'hkunlp/instructor-base'}

search_input = st.text_input(
        label='Enter Your Search Query',value= "What key challenges did the business face?", key='search')
        
sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')

st.sidebar.markdown('Earnings QnA Generator')
        
chunk_size = 1000
overlap_size = 50

try:

    if search_input:
        
        if "sen_df" in st.session_state and "earnings_passages" in st.session_state:
        
            ## Save to a dataframe for ease of visualization
            sen_df = st.session_state['sen_df']

            title = st.session_state['title']

            earnings_text = st.session_state['earnings_passages']

            print(f'earnings_to_be_embedded:{earnings_text}')

            st.session_state.eval_set = generate_eval(
            earnings_text, 10, 3000)

            # Display the question-answer pairs in the sidebar with smaller text
            for i, qa_pair in enumerate(st.session_state.eval_set):
                st.sidebar.markdown(
                    f"""
                    <div class="css-card">
                    <span class="card-tag">Question {i + 1}</span>
                        <p style="font-size: 12px;">{qa_pair['question']}</p>
                        <p style="font-size: 12px;">{qa_pair['answer']}</p>
                    </div>
                    """,
                    unsafe_allow_html=True,
                )

            embedding_model = bi_enc_dict[sbert_model_name]
                            
            with st.spinner(
                text=f"Loading {embedding_model} embedding model and Generating Response..."
            ):
                print('cheeky')
                print(earnings_text)
                docsearch = process_corpus(earnings_text,title, embedding_model)

                result = embed_text(search_input,docsearch)


            references = [doc.page_content for doc in result['source_documents']]

            answer = result['answer']

            sentiment_label = gen_sentiment(answer)
                
            ##### Sematic Search #####
            
            df = pd.DataFrame.from_dict({'Text':[answer],'Sentiment':[sentiment_label]})
              
            
            text_annotations = gen_annotated_text(df)[0]            
            
            with st.expander(label='Query Result', expanded=True):
                annotated_text(text_annotations)
                
            with st.expander(label='References from Corpus used to Generate Result'):
                for ref in references:
                    st.write(ref)
                
        else:
            
            st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
            
    else:
    
        st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')  
        
except RuntimeError:
  
    st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')