import streamlit as st from pyserini.search.lucene import LuceneSearcher import json import time st.set_page_config(page_title="Pyserini x Datasets", page_icon='🌸', layout="centered") searcher = LuceneSearcher('index') cola, colb, colc = st.columns([5,4,5]) with colb: st.image("logo.jpeg") col1, col2 = st.columns([9, 1]) with col1: search_query = st.text_input(label="", placeholder="Search") with col2: st.write('#') button_clicked = st.button("🔎") if search_query or button_clicked: num_results = None #search_results = searcher.search(myquery, limit=num_results) t_0 = time.time() search_results = searcher.search(search_query, k=100_000) search_time = time.time() - t_0 #st.write(dir(search_results[0])) st.write(f'
Retrieved {len(search_results):,.0f} documents in {search_time*1000:.2f} ms
', unsafe_allow_html=True) for result in search_results[:10]: #keywords = ', '.join(result.key_terms('text')) #meta = result['meta'] #st.write(f"Document Keywords: {keywords}", unsafe_allow_html=True) result = json.loads(result.raw) doc = result["contents"] result_id = result["id"] try: st.write(doc[:1000], unsafe_allow_html=True) st.write(f'