Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

App Files Files Community

nickmuchi commited on Oct 12, 2022

Commit

a957eeb

•

1 Parent(s): 648179d

Update pages/3_Earnings_Semantic_Search_🔎_.py

Browse files

Files changed (1) hide show

pages/3_Earnings_Semantic_Search_🔎_.py +72 -66

pages/3_Earnings_Semantic_Search_🔎_.py CHANGED Viewed

@@ -20,79 +20,85 @@ top_k = st.sidebar.slider("Number of Top Hits Generated",min_value=1,max_value=5
 window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=7,value=3)
-if search_input:
-    if "sen_df" in st.session_state and "earnings_passages" in st.session_state:
-        ## Save to a dataframe for ease of visualization
-        sen_df = st.session_state['sen_df']
-        passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
-        with st.spinner(
-            text=f"Loading {sbert_model_name} encoder..."
-        ):
-            sbert = load_sbert(sbert_model_name)
-        ##### Sematic Search #####
-        # Encode the query using the bi-encoder and find potentially relevant passages
-        corpus_embeddings = sbert.encode(passages, convert_to_tensor=True, show_progress_bar=True)
-        question_embedding = sbert.encode(search_input, convert_to_tensor=True)
-        question_embedding = question_embedding.cpu()
-        hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k,score_function=util.dot_score)
-        hits = hits[0]  # Get the hits for the first query
-        ##### Re-Ranking #####
-        # Now, score all retrieved passages with the cross_encoder
-        cross_inp = [[search_input, passages[hit['corpus_id']]] for hit in hits]
-        cross_scores = cross_encoder.predict(cross_inp)
-        # Sort results by the cross-encoder scores
-        for idx in range(len(cross_scores)):
-            hits[idx]['cross-score'] = cross_scores[idx]
-        # Output of top-3 hits from re-ranker
-        hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
-        score='cross-score'
-        df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:int(top_k)]],columns=['Score','Text'])
-        df['Score'] = round(df['Score'],2)
-        df['Sentiment'] = df.Text.apply(gen_sentiment)
-        def gen_annotated_text(df):
-            '''Generate annotated text'''
-            tag_list=[]
-            for row in df.itertuples():
-                label = row[3]
-                text = row[2]
-                if label == 'Positive':
-                    tag_list.append((text,label,'#8fce00'))
-                elif label == 'Negative':
-                    tag_list.append((text,label,'#f44336'))
-                else:
-                    tag_list.append((text,label,'#000000'))
-            return tag_list
-        text_annotations = gen_annotated_text(df)
-        first, second = text_annotations[0], text_annotations[1]
-        with st.expander(label='Best Search Query Result', expanded=True):
-            annotated_text(first)
-        with st.expander(label='Alternative Search Query Result'):
-            annotated_text(second)
     else:
-        st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
-else:
-    st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')

 window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=7,value=3)
+try:
+    if search_input:
+        if "sen_df" in st.session_state and "earnings_passages" in st.session_state:
+            ## Save to a dataframe for ease of visualization
+            sen_df = st.session_state['sen_df']
+            passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
+            with st.spinner(
+                text=f"Loading {sbert_model_name} encoder..."
+            ):
+                sbert = load_sbert(sbert_model_name)
+            ##### Sematic Search #####
+            # Encode the query using the bi-encoder and find potentially relevant passages
+            corpus_embeddings = sbert.encode(passages, convert_to_tensor=True, show_progress_bar=True)
+            question_embedding = sbert.encode(search_input, convert_to_tensor=True)
+            question_embedding = question_embedding.cpu()
+            hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k,score_function=util.dot_score)
+            hits = hits[0]  # Get the hits for the first query
+            ##### Re-Ranking #####
+            # Now, score all retrieved passages with the cross_encoder
+            cross_inp = [[search_input, passages[hit['corpus_id']]] for hit in hits]
+            cross_scores = cross_encoder.predict(cross_inp)
+            # Sort results by the cross-encoder scores
+            for idx in range(len(cross_scores)):
+                hits[idx]['cross-score'] = cross_scores[idx]
+            # Output of top-3 hits from re-ranker
+            hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
+            score='cross-score'
+            df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:int(top_k)]],columns=['Score','Text'])
+            df['Score'] = round(df['Score'],2)
+            df['Sentiment'] = df.Text.apply(gen_sentiment)
+            def gen_annotated_text(df):
+                '''Generate annotated text'''
+                tag_list=[]
+                for row in df.itertuples():
+                    label = row[3]
+                    text = row[2]
+                    if label == 'Positive':
+                        tag_list.append((text,label,'#8fce00'))
+                    elif label == 'Negative':
+                        tag_list.append((text,label,'#f44336'))
+                    else:
+                        tag_list.append((text,label,'#000000'))
+                return tag_list
+            text_annotations = gen_annotated_text(df)
+            first, second = text_annotations[0], text_annotations[1]
+            with st.expander(label='Best Search Query Result', expanded=True):
+                annotated_text(first)
+            with st.expander(label='Alternative Search Query Result'):
+                annotated_text(second)
+        else:
+            st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
     else:
+        st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
+  except RuntimeError:
+        st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file'