Spaces:
Sleeping
Sleeping
File size: 1,474 Bytes
27bf2bb 444cbed 27bf2bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from typing import List, TypedDict
from BM25Retriever import BM25Retriever
# from CSCBM25Retriever import CSCBM25Retriever # Replace with your retriever module
class Hit(TypedDict):
cid: str
score: float
text: str
# Load the pre-built index
# index_dir = "output/csc_bm25_index"
# retriever = CSCBM25Retriever(index_dir=index_dir)
retriever = BM25Retriever(index_dir="output/bm25_index")
# retriever = BM25Retriever(index_dir="output/bm25_index")
# Define the search function
def search(query: str) -> List[Hit]:
"""
Search function to query the BM25 index.
Args:
query: Input query string.
Returns:
List of search results ranked by relevance.
"""
results = retriever.retrieve(query, topk=10) # Retrieve top 10 results
hits = [
{
"cid": cid,
"score": score,
"text": retriever.index.doc_texts[retriever.index.cid2docid[cid]],
}
for cid, score in results.items()
]
return hits
# Define the Gradio interface
demo = gr.Interface(
fn=search, # Function to execute on input
inputs=gr.Textbox(label="Query"), # Single input text box for the query
outputs=gr.JSON(label="Search Results"), # Output the results as JSON
title="BM25 Search Engine Demo",
description="Demo of a BM25-based search engine using a sparse index on the SciQ dataset.",
)
# Launch the app
if __name__ == "__main__":
demo.launch()
|