Spaces:
Runtime error
Runtime error
File size: 4,057 Bytes
444aa3f 2a13c73 177af2d c8a1687 444aa3f 2a13c73 c8a1687 c6dd20e 44ee439 06bca0c ab096a6 c8a1687 444aa3f c6dd20e 444aa3f c6dd20e 06bca0c c8a1687 c6dd20e 444aa3f c6dd20e c8a1687 444aa3f c8a1687 c6dd20e c8a1687 c6dd20e c8a1687 c6dd20e c8a1687 c6dd20e c8a1687 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import logging
import os
import gradio as gr
import pandas as pd
from huggingface_hub import hf_hub_download
from buster.apps.bot_configs import available_configs
from buster.busterbot import Buster, BusterConfig
from buster.retriever import Retriever
from buster.utils import get_retriever_from_extension
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
DEFAULT_CONFIG = "huggingface"
# DOWNLOAD FROM HF HUB
HUB_TOKEN = os.getenv("HUB_TOKEN")
REPO_ID = "jerpint/buster-data"
HUB_DB_FILE = "documents.db"
logger.info(f"Downloading {HUB_DB_FILE} from hub...")
hf_hub_download(
repo_id=REPO_ID,
repo_type="dataset",
filename=HUB_DB_FILE,
token=HUB_TOKEN,
local_dir=".",
)
logger.info(f"Downloaded.")
retriever: Retriever = get_retriever_from_extension(HUB_DB_FILE)(HUB_DB_FILE)
# initialize buster with the default config...
default_cfg: BusterConfig = available_configs.get(DEFAULT_CONFIG)
buster = Buster(cfg=default_cfg, retriever=retriever)
def format_sources(matched_documents: pd.DataFrame) -> str:
if len(matched_documents) == 0:
return ""
sourced_answer_template: str = (
"""π Here are the sources I used to answer your question:<br>""" """{sources}<br><br>""" """{footnote}"""
)
source_template: str = """[π {source.title}]({source.url}), relevance: {source.similarity:2.1f} %"""
matched_documents.similarity = matched_documents.similarity * 100
sources = "<br>".join([source_template.format(source=source) for _, source in matched_documents.iterrows()])
footnote: str = "I'm a bot π€ and not always perfect."
return sourced_answer_template.format(sources=sources, footnote=footnote)
def chat(question, history, bot_source):
history = history or []
cfg = available_configs.get(bot_source)
buster.update_cfg(cfg)
response = buster.process_input(question)
# formatted_sources = source_formatter(sources)
matched_documents = response.matched_documents
formatted_sources = format_sources(matched_documents)
formatted_response = f"{response.completion.text}<br><br>" + formatted_sources
history.append((question, formatted_response))
return history, history
block = gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}")
with block:
with gr.Row():
gr.Markdown("<h3><center>Buster π€: A Question-Answering Bot for open-source libraries </center></h3>")
doc_source = gr.Dropdown(
choices=sorted(list(available_configs.keys())),
value=DEFAULT_CONFIG,
interactive=True,
multiselect=False,
label="Source of Documentation",
info="The source of documentation to select from",
)
chatbot = gr.Chatbot()
with gr.Row():
message = gr.Textbox(
label="What's your question?",
placeholder="What kind of model should I use for sentiment analysis?",
lines=1,
)
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
examples = gr.Examples(
# TODO: seems not possible (for now) to update examples on change...
examples=[
"What kind of models should I use for images and text?",
"When should I finetune a model vs. training it form scratch?",
"Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?",
],
inputs=message,
)
gr.Markdown(
"""This simple application uses GPT to search the huggingface π€ transformers docs and answer questions.
For more info on huggingface transformers view the [full documentation.](https://huggingface.co/docs/transformers/index)."""
)
gr.HTML("οΈ<center> Created with β€οΈ by @jerpint and @hadrienbertrand")
state = gr.State()
agent_state = gr.State()
submit.click(chat, inputs=[message, state, doc_source], outputs=[chatbot, state])
message.submit(chat, inputs=[message, state, doc_source], outputs=[chatbot, state])
block.launch(debug=True)
|