pyserini-demo / app.py
cakiki's picture
Update app.py
c103890
import streamlit as st
from pyserini.search.lucene import LuceneSearcher
import json
import time
st.set_page_config(page_title="Pyserini x Datasets", page_icon='🌸', layout="centered")
searcher = LuceneSearcher('index')
cola, colb, colc = st.columns([5,4,5])
with colb:
st.image("logo.jpeg")
col1, col2 = st.columns([9, 1])
with col1:
search_query = st.text_input(label="", placeholder="Search")
with col2:
st.write('#')
button_clicked = st.button("🔎")
if search_query or button_clicked:
num_results = None
#search_results = searcher.search(myquery, limit=num_results)
t_0 = time.time()
search_results = searcher.search(search_query, k=100_000)
search_time = time.time() - t_0
#st.write(dir(search_results[0]))
st.write(f'<p align=\"right\" style=\"color:grey;\">Retrieved {len(search_results):,.0f} documents in {search_time*1000:.2f} ms</p>', unsafe_allow_html=True)
for result in search_results[:10]:
#keywords = ', '.join(result.key_terms('text'))
#meta = result['meta']
#st.write(f"<b>Document Keywords</b>: {keywords}", unsafe_allow_html=True)
result = json.loads(result.raw)
doc = result["contents"]
result_id = result["id"]
try:
st.write(doc[:1000], unsafe_allow_html=True)
st.write(f'<div align="right"><b>Document ID</b>: {result_id}</div>', unsafe_allow_html=True)
except:
pass
#with st.expander("Document Text", expanded=False):
#st.write(result['text'][:1600])
st.write('---')