Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

App Files Files Community

nickmuchi commited on Oct 5, 2022

Commit

ee6d004

1 Parent(s): 754ce49

Update pages/3_Earnings_Semantic_Search_🔎_.py

Browse files

Files changed (1) hide show

pages/3_Earnings_Semantic_Search_🔎_.py +50 -44

pages/3_Earnings_Semantic_Search_🔎_.py CHANGED Viewed

@@ -18,49 +18,55 @@ if "sen_df" not in st.session_state:
 if "earnings_passages" not in st.session_state:
     st.session_state["earnings_passages"] = ''
-## Save to a dataframe for ease of visualization
-sen_df = st.session_state['sen_def']
-passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
-##### Sematic Search #####
-# Encode the query using the bi-encoder and find potentially relevant passages
-corpus_embeddings = sbert.encode(passages, convert_to_tensor=True, show_progress_bar=True)
-question_embedding = sbert.encode(search_input, convert_to_tensor=True)
-question_embedding = question_embedding.cpu()
-hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k,score_function=util.dot_score)
-hits = hits[0]  # Get the hits for the first query
-##### Re-Ranking #####
-# Now, score all retrieved passages with the cross_encoder
-cross_inp = [[query, passages[hit['corpus_id']]] for hit in hits]
-cross_scores = cross_encoder.predict(cross_inp)
-# Sort results by the cross-encoder scores
-for idx in range(len(cross_scores)):
-    hits[idx]['cross-score'] = cross_scores[idx]
-# Output of top-3 hits from re-ranker
-hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
-score='cross-score'
-df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:2]],columns=['Score','Text'])
-df['Score'] = round(df['Score'],2)
-def gen_annotated_text(para):
-    tag_list = []
-    for i in sent_tokenize(para):
-        label = sen_df.loc[sen_df['text']==i, 'label'].values[0]
-        if label == 'Negative':
-            tag_list.append((i,label,'#faa'))
-        elif label == 'Positive':
-            tag_list.append((i,label,'#afa'))
-        else:
-            tag_list.append((i,label,'#fea'))
-    return tag_list
-text_to_annotate = [gen_annotated_text(para) for para in df.Text.tolist()]
-for i in text_to_annotate:
-    annotated_text(i)

 if "earnings_passages" not in st.session_state:
     st.session_state["earnings_passages"] = ''
+if st.session_state["sen_df"] or st.session_state["earnings_passages"]:
+    ## Save to a dataframe for ease of visualization
+    sen_df = st.session_state['sen_def']
+    passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
+    ##### Sematic Search #####
+    # Encode the query using the bi-encoder and find potentially relevant passages
+    corpus_embeddings = sbert.encode(passages, convert_to_tensor=True, show_progress_bar=True)
+    question_embedding = sbert.encode(search_input, convert_to_tensor=True)
+    question_embedding = question_embedding.cpu()
+    hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k,score_function=util.dot_score)
+    hits = hits[0]  # Get the hits for the first query
+    ##### Re-Ranking #####
+    # Now, score all retrieved passages with the cross_encoder
+    cross_inp = [[query, passages[hit['corpus_id']]] for hit in hits]
+    cross_scores = cross_encoder.predict(cross_inp)
+    # Sort results by the cross-encoder scores
+    for idx in range(len(cross_scores)):
+        hits[idx]['cross-score'] = cross_scores[idx]
+    # Output of top-3 hits from re-ranker
+    hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
+    score='cross-score'
+    df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:2]],columns=['Score','Text'])
+    df['Score'] = round(df['Score'],2)
+    def gen_annotated_text(para):
+        tag_list = []
+        for i in sent_tokenize(para):
+            label = sen_df.loc[sen_df['text']==i, 'label'].values[0]
+            if label == 'Negative':
+                tag_list.append((i,label,'#faa'))
+            elif label == 'Positive':
+                tag_list.append((i,label,'#afa'))
+            else:
+                tag_list.append((i,label,'#fea'))
+        return tag_list
+    text_to_annotate = [gen_annotated_text(para) for para in df.Text.tolist()]
+    for i in text_to_annotate:
+        annotated_text(i)
+else:
+    st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')