Spaces:
Runtime error
Runtime error
import logging | |
import datasets | |
from findkit import indexes | |
import gradio as gr | |
logging.basicConfig(level="INFO") | |
def get_html_retrieval_results(retrieval_result, show_only_one_match_per_episode): | |
if show_only_one_match_per_episode: | |
retrieval_result = retrieval_result.drop_duplicates(subset=["episode"]) | |
if len(retrieval_result) > 0: | |
retrieval_result_html = retrieval_result.to_html(render_links=True, index=False) | |
return retrieval_result_html | |
else: | |
return "" | |
def get_retrieval_results(findkit_index, query, n_retrieved_results): | |
retrieval_results_df = findkit_index.find_similar(query, n_retrieved_results) | |
return retrieval_results_df.rename({"distance": "bm25_score"}) | |
def setup_df(): | |
podcast_dataset = datasets.load_dataset("lambdaofgod/lex_fridman_podcast")["train"] | |
df = podcast_dataset.to_pandas() | |
return df.dropna() | |
def setup_index(): | |
df = setup_df() | |
return indexes.InMemoryBM25Index.build(df["text"], df) | |
findkit_index = setup_index() | |
def show_retrieval_results(query, n_retrieved_results, show_only_one_match_per_episode): | |
retrieval_results_df = get_retrieval_results( | |
findkit_index, query, n_retrieved_results | |
) | |
return get_html_retrieval_results( | |
retrieval_results_df, show_only_one_match_per_episode | |
) | |
show_only_one_match_per_episode = gr.Checkbox( | |
label="show only one match per episode", value=False | |
) | |
n_retrieved_results = gr.Number(label="number of results", value=10, precision=0) | |
query = gr.Textbox(label="input query", value="artificial life") | |
demo = gr.Interface( | |
fn=show_retrieval_results, | |
inputs=[query, n_retrieved_results, show_only_one_match_per_episode], | |
outputs="html", | |
) | |
demo.launch() | |