File size: 4,057 Bytes
444aa3f
2a13c73
177af2d
c8a1687
444aa3f
2a13c73
c8a1687
c6dd20e
44ee439
06bca0c
ab096a6
c8a1687
444aa3f
 
 
c6dd20e
 
444aa3f
 
 
 
 
 
 
 
 
 
 
 
 
 
c6dd20e
 
 
06bca0c
c8a1687
c6dd20e
444aa3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6dd20e
 
 
 
c8a1687
444aa3f
 
 
 
 
 
 
 
 
c8a1687
 
 
 
c6dd20e
c8a1687
 
 
c6dd20e
 
 
 
 
 
 
 
 
 
c8a1687
 
 
 
 
 
 
 
 
 
 
c6dd20e
 
c8a1687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6dd20e
 
c8a1687
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import logging
import os

import gradio as gr
import pandas as pd
from huggingface_hub import hf_hub_download

from buster.apps.bot_configs import available_configs
from buster.busterbot import Buster, BusterConfig
from buster.retriever import Retriever
from buster.utils import get_retriever_from_extension

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

DEFAULT_CONFIG = "huggingface"

# DOWNLOAD FROM HF HUB
HUB_TOKEN = os.getenv("HUB_TOKEN")
REPO_ID = "jerpint/buster-data"
HUB_DB_FILE = "documents.db"
logger.info(f"Downloading {HUB_DB_FILE} from hub...")
hf_hub_download(
    repo_id=REPO_ID,
    repo_type="dataset",
    filename=HUB_DB_FILE,
    token=HUB_TOKEN,
    local_dir=".",
)
logger.info(f"Downloaded.")
retriever: Retriever = get_retriever_from_extension(HUB_DB_FILE)(HUB_DB_FILE)

# initialize buster with the default config...
default_cfg: BusterConfig = available_configs.get(DEFAULT_CONFIG)
buster = Buster(cfg=default_cfg, retriever=retriever)


def format_sources(matched_documents: pd.DataFrame) -> str:
    if len(matched_documents) == 0:
        return ""

    sourced_answer_template: str = (
        """πŸ“ Here are the sources I used to answer your question:<br>""" """{sources}<br><br>""" """{footnote}"""
    )
    source_template: str = """[πŸ”— {source.title}]({source.url}), relevance: {source.similarity:2.1f} %"""

    matched_documents.similarity = matched_documents.similarity * 100
    sources = "<br>".join([source_template.format(source=source) for _, source in matched_documents.iterrows()])
    footnote: str = "I'm a bot πŸ€– and not always perfect."

    return sourced_answer_template.format(sources=sources, footnote=footnote)


def chat(question, history, bot_source):
    history = history or []
    cfg = available_configs.get(bot_source)
    buster.update_cfg(cfg)

    response = buster.process_input(question)

    # formatted_sources = source_formatter(sources)
    matched_documents = response.matched_documents

    formatted_sources = format_sources(matched_documents)
    formatted_response = f"{response.completion.text}<br><br>" + formatted_sources

    history.append((question, formatted_response))

    return history, history


block = gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}")

with block:
    with gr.Row():
        gr.Markdown("<h3><center>Buster πŸ€–: A Question-Answering Bot for open-source libraries </center></h3>")

    doc_source = gr.Dropdown(
        choices=sorted(list(available_configs.keys())),
        value=DEFAULT_CONFIG,
        interactive=True,
        multiselect=False,
        label="Source of Documentation",
        info="The source of documentation to select from",
    )

    chatbot = gr.Chatbot()

    with gr.Row():
        message = gr.Textbox(
            label="What's your question?",
            placeholder="What kind of model should I use for sentiment analysis?",
            lines=1,
        )
        submit = gr.Button(value="Send", variant="secondary").style(full_width=False)

    examples = gr.Examples(
        # TODO: seems not possible (for now) to update examples on change...
        examples=[
            "What kind of models should I use for images and text?",
            "When should I finetune a model vs. training it form scratch?",
            "Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?",
        ],
        inputs=message,
    )

    gr.Markdown(
        """This simple application uses GPT to search the huggingface πŸ€— transformers docs and answer questions.
    For more info on huggingface transformers view the [full documentation.](https://huggingface.co/docs/transformers/index)."""
    )

    gr.HTML("️<center> Created with ❀️ by @jerpint and @hadrienbertrand")

    state = gr.State()
    agent_state = gr.State()

    submit.click(chat, inputs=[message, state, doc_source], outputs=[chatbot, state])
    message.submit(chat, inputs=[message, state, doc_source], outputs=[chatbot, state])


block.launch(debug=True)