Build BM25 index from documents and save to output directory
Browse files
app.py
CHANGED
@@ -363,17 +363,16 @@ class Hit(TypedDict):
|
|
363 |
|
364 |
demo: Optional[gr.Interface] = None # Assign your gradio demo to this variable
|
365 |
return_type = List[Hit]
|
|
|
|
|
366 |
bm25_index = BM25Index.build_from_documents(
|
367 |
documents=iter(sciq.corpus),
|
368 |
ndocs=12160,
|
369 |
show_progress_bar=True,
|
370 |
-
k1=0.9,
|
371 |
-
b=0.4,
|
372 |
)
|
373 |
bm25_index.save("output/bm25_index")
|
374 |
bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
|
375 |
|
376 |
-
## YOUR_CODE_STARTS_HERE
|
377 |
def retrieve(query: str, topk: int = 10) -> return_type:
|
378 |
ranking = bm25_retriever.retrieve(query=query, topk=topk)
|
379 |
hits = []
|
|
|
363 |
|
364 |
demo: Optional[gr.Interface] = None # Assign your gradio demo to this variable
|
365 |
return_type = List[Hit]
|
366 |
+
|
367 |
+
## YOUR_CODE_STARTS_HERE
|
368 |
bm25_index = BM25Index.build_from_documents(
|
369 |
documents=iter(sciq.corpus),
|
370 |
ndocs=12160,
|
371 |
show_progress_bar=True,
|
|
|
|
|
372 |
)
|
373 |
bm25_index.save("output/bm25_index")
|
374 |
bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
|
375 |
|
|
|
376 |
def retrieve(query: str, topk: int = 10) -> return_type:
|
377 |
ranking = bm25_retriever.retrieve(query=query, topk=topk)
|
378 |
hits = []
|