Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from model.search.hybrid import HybridSearchClient | |
from model.data.notion_db import fetch_sakurap_corpus | |
def search(search_client: HybridSearchClient): | |
def _search(query: str) -> pd.DataFrame: | |
results = search_client.search_top_n(query) | |
result = results[0] | |
result["rank"] = result["rank"] + 1 | |
result = result[["rank", "title", "content", "rank_sparse", "rank_dense"]] | |
result.columns = ["rank", "title", "rap lyric", "rank: surface", "rank: vector"] | |
return result | |
return _search | |
if __name__ == "__main__": | |
# Load dataset | |
sakurap_df = fetch_sakurap_corpus("./data/sakurap_corpus.csv") | |
# Initialize search client | |
search_client = HybridSearchClient.from_dataframe(sakurap_df, "content") | |
with gr.Blocks() as search_interface: | |
gr.Markdown(""" | |
# π Cobalt | |
Demo app for hybrid search with vector and surface search using [Ruri](https://huggingface.co/cl-nagoya/ruri-large), [BM25](https://github.com/dorianbrown/rank_bm25) and [Voyager](https://spotify.github.io/voyager/). | |
""") | |
# Input query | |
search_query = gr.Textbox(label="Query", submit_btn=True) | |
gr.Markdown(""" | |
## Search Results | |
""") | |
# Search result | |
result_table = gr.DataFrame(label="Result", | |
column_widths=["5%", "20%", "65%", "5%", "5%"], | |
wrap=True, | |
datatype=["str", "str", "markdown", "str", "str"], | |
interactive=False) | |
# Event handler | |
search_query.submit(fn=search(search_client), inputs=search_query, outputs=result_table) | |
# App launch | |
search_interface.queue() | |
search_interface.launch() | |