lambdaofgod's picture
gradio app setup
426db28
raw
history blame
1.75 kB
import logging
import datasets
from findkit import indexes
import gradio as gr
logging.basicConfig(level="INFO")
def get_html_retrieval_results(retrieval_result, show_only_one_match_per_episode):
if show_only_one_match_per_episode:
retrieval_result = retrieval_result.drop_duplicates(subset=["episode"])
if len(retrieval_result) > 0:
retrieval_result_html = retrieval_result.to_html(render_links=True, index=False)
return retrieval_result_html
else:
return ""
def get_retrieval_results(findkit_index, query, n_retrieved_results):
retrieval_results_df = findkit_index.find_similar(query, n_retrieved_results)
return retrieval_results_df.rename({"distance": "bm25_score"})
def setup_df():
podcast_dataset = datasets.load_dataset("lambdaofgod/lex_fridman_podcast")["train"]
df = podcast_dataset.to_pandas()
return df.dropna()
def setup_index():
df = setup_df()
return indexes.InMemoryBM25Index.build(df["text"], df)
findkit_index = setup_index()
def show_retrieval_results(query, n_retrieved_results, show_only_one_match_per_episode):
retrieval_results_df = get_retrieval_results(
findkit_index, query, n_retrieved_results
)
return get_html_retrieval_results(
retrieval_results_df, show_only_one_match_per_episode
)
show_only_one_match_per_episode = gr.Checkbox(
label="show only one match per episode", value=False
)
n_retrieved_results = gr.Number(label="number of results", value=10, precision=0)
query = gr.Textbox(label="input query", value="artificial life")
demo = gr.Interface(
fn=show_retrieval_results,
inputs=[query, n_retrieved_results, show_only_one_match_per_episode],
outputs="html",
)
demo.launch()