Spaces:
Runtime error
Runtime error
import logging | |
import os | |
import gradio as gr | |
import pandas as pd | |
from huggingface_hub import hf_hub_download | |
from buster.apps.bot_configs import available_configs | |
from buster.busterbot import Buster, BusterConfig | |
from buster.retriever import Retriever | |
from buster.utils import get_retriever_from_extension | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.INFO) | |
DEFAULT_CONFIG = "huggingface" | |
# DOWNLOAD FROM HF HUB | |
HUB_TOKEN = os.getenv("HUB_TOKEN") | |
REPO_ID = "jerpint/buster-data" | |
HUB_DB_FILE = "documents.db" | |
logger.info(f"Downloading {HUB_DB_FILE} from hub...") | |
hf_hub_download( | |
repo_id=REPO_ID, | |
repo_type="dataset", | |
filename=HUB_DB_FILE, | |
token=HUB_TOKEN, | |
local_dir=".", | |
) | |
logger.info(f"Downloaded.") | |
retriever: Retriever = get_retriever_from_extension(HUB_DB_FILE)(HUB_DB_FILE) | |
# initialize buster with the default config... | |
default_cfg: BusterConfig = available_configs.get(DEFAULT_CONFIG) | |
buster = Buster(cfg=default_cfg, retriever=retriever) | |
def format_sources(matched_documents: pd.DataFrame) -> str: | |
if len(matched_documents) == 0: | |
return "" | |
sourced_answer_template: str = ( | |
"""π Here are the sources I used to answer your question:<br>""" """{sources}<br><br>""" """{footnote}""" | |
) | |
source_template: str = """[π {source.title}]({source.url}), relevance: {source.similarity:2.1f} %""" | |
matched_documents.similarity = matched_documents.similarity * 100 | |
sources = "<br>".join([source_template.format(source=source) for _, source in matched_documents.iterrows()]) | |
footnote: str = "I'm a bot π€ and not always perfect." | |
return sourced_answer_template.format(sources=sources, footnote=footnote) | |
def chat(question, history, bot_source): | |
history = history or [] | |
cfg = available_configs.get(bot_source) | |
buster.update_cfg(cfg) | |
response = buster.process_input(question) | |
# formatted_sources = source_formatter(sources) | |
matched_documents = response.matched_documents | |
formatted_sources = format_sources(matched_documents) | |
formatted_response = f"{response.completion.text}<br><br>" + formatted_sources | |
history.append((question, formatted_response)) | |
return history, history | |
block = gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}") | |
with block: | |
with gr.Row(): | |
gr.Markdown("<h3><center>Buster π€: A Question-Answering Bot for open-source libraries </center></h3>") | |
doc_source = gr.Dropdown( | |
choices=sorted(list(available_configs.keys())), | |
value=DEFAULT_CONFIG, | |
interactive=True, | |
multiselect=False, | |
label="Source of Documentation", | |
info="The source of documentation to select from", | |
) | |
chatbot = gr.Chatbot() | |
with gr.Row(): | |
message = gr.Textbox( | |
label="What's your question?", | |
placeholder="What kind of model should I use for sentiment analysis?", | |
lines=1, | |
) | |
submit = gr.Button(value="Send", variant="secondary").style(full_width=False) | |
examples = gr.Examples( | |
# TODO: seems not possible (for now) to update examples on change... | |
examples=[ | |
"What kind of models should I use for images and text?", | |
"When should I finetune a model vs. training it form scratch?", | |
"Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?", | |
], | |
inputs=message, | |
) | |
gr.Markdown( | |
"""This simple application uses GPT to search the huggingface π€ transformers docs and answer questions. | |
For more info on huggingface transformers view the [full documentation.](https://huggingface.co/docs/transformers/index).""" | |
) | |
gr.HTML("οΈ<center> Created with β€οΈ by @jerpint and @hadrienbertrand") | |
state = gr.State() | |
agent_state = gr.State() | |
submit.click(chat, inputs=[message, state, doc_source], outputs=[chatbot, state]) | |
message.submit(chat, inputs=[message, state, doc_source], outputs=[chatbot, state]) | |
block.launch(debug=True) | |