Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,8 +6,7 @@ from rank_bm25 import BM25Okapi
|
|
| 6 |
from sklearn.feature_extraction import _stop_words
|
| 7 |
import string
|
| 8 |
import numpy as np
|
| 9 |
-
|
| 10 |
-
import time
|
| 11 |
from newspaper import Article
|
| 12 |
import base64
|
| 13 |
import docx2txt
|
|
@@ -168,9 +167,16 @@ def bm25_api(passages):
|
|
| 168 |
|
| 169 |
bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1"]
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
# This function will search all wikipedia articles for passages that
|
| 172 |
# answer the query
|
| 173 |
-
def search_func(query, top_k=
|
| 174 |
st.write(f"Search Query: {query}")
|
| 175 |
|
| 176 |
if url_text:
|
|
@@ -188,8 +194,9 @@ def search_func(query, top_k=2):
|
|
| 188 |
bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
|
| 189 |
|
| 190 |
st.subheader(f"Top-{top_k} lexical search (BM25) hits")
|
| 191 |
-
|
| 192 |
-
|
|
|
|
| 193 |
|
| 194 |
##### Sematic Search #####
|
| 195 |
# Encode the query using the bi-encoder and find potentially relevant passages
|
|
@@ -211,15 +218,17 @@ def search_func(query, top_k=2):
|
|
| 211 |
st.markdown("\n-------------------------\n")
|
| 212 |
st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
|
| 213 |
hits = sorted(hits, key=lambda x: x['score'], reverse=True)
|
| 214 |
-
|
| 215 |
-
|
|
|
|
| 216 |
|
| 217 |
# Output of top-3 hits from re-ranker
|
| 218 |
st.markdown("\n-------------------------\n")
|
| 219 |
st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
|
| 220 |
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
| 221 |
-
|
| 222 |
-
|
|
|
|
| 223 |
|
| 224 |
#Streamlit App
|
| 225 |
|
|
|
|
| 6 |
from sklearn.feature_extraction import _stop_words
|
| 7 |
import string
|
| 8 |
import numpy as np
|
| 9 |
+
import pandas as pd
|
|
|
|
| 10 |
from newspaper import Article
|
| 11 |
import base64
|
| 12 |
import docx2txt
|
|
|
|
| 167 |
|
| 168 |
bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1"]
|
| 169 |
|
| 170 |
+
def display_df_as_table(model,top_k,score):
|
| 171 |
+
# Display the df with text and scores as a table
|
| 172 |
+
df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in model[0:top_k]],columns=['Score','Text'])
|
| 173 |
+
df['Score'] = round(df['Score'],2)
|
| 174 |
+
|
| 175 |
+
return df
|
| 176 |
+
|
| 177 |
# This function will search all wikipedia articles for passages that
|
| 178 |
# answer the query
|
| 179 |
+
def search_func(query, top_k=top_k):
|
| 180 |
st.write(f"Search Query: {query}")
|
| 181 |
|
| 182 |
if url_text:
|
|
|
|
| 194 |
bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
|
| 195 |
|
| 196 |
st.subheader(f"Top-{top_k} lexical search (BM25) hits")
|
| 197 |
+
|
| 198 |
+
bm25_df = display_df_as_table(bm25_hits,top_k,'score')
|
| 199 |
+
st.write(bm25_df.to_html(index=False), unsafe_allow_html=True)
|
| 200 |
|
| 201 |
##### Sematic Search #####
|
| 202 |
# Encode the query using the bi-encoder and find potentially relevant passages
|
|
|
|
| 218 |
st.markdown("\n-------------------------\n")
|
| 219 |
st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
|
| 220 |
hits = sorted(hits, key=lambda x: x['score'], reverse=True)
|
| 221 |
+
|
| 222 |
+
cross_df = display_df_as_table(hits,top_k,'score')
|
| 223 |
+
st.write(cross_df.to_html(index=False), unsafe_allow_html=True)
|
| 224 |
|
| 225 |
# Output of top-3 hits from re-ranker
|
| 226 |
st.markdown("\n-------------------------\n")
|
| 227 |
st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
|
| 228 |
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
| 229 |
+
|
| 230 |
+
rerank_df = display_df_as_table(hits,top_k,'cross-score')
|
| 231 |
+
st.write(rerank_df.to_html(index=False), unsafe_allow_html=True)
|
| 232 |
|
| 233 |
#Streamlit App
|
| 234 |
|