File size: 3,704 Bytes
d60948f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1423dd4
d60948f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a72052
 
 
 
d60948f
e21035f
d60948f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5666436
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import cfg
import gradio as gr
import pandas as pd
from cfg import setup_buster

buster = setup_buster(cfg.buster_cfg)


def format_sources(matched_documents: pd.DataFrame) -> str:
    if len(matched_documents) == 0:
        return ""

    matched_documents.similarity_to_answer = (
        matched_documents.similarity_to_answer * 100
    )

    # print the page instead of the heading, more meaningful for hf docs
    matched_documents["page"] = matched_documents.apply(
        lambda x: x.url.split("/")[-1], axis=1
    )

    documents_answer_template: str = "📝 Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
    document_template: str = "[🔗 {document.page}]({document.url}), relevance: {document.similarity_to_answer:2.1f} %"

    documents = "\n".join(
        [
            document_template.format(document=document)
            for _, document in matched_documents.iterrows()
        ]
    )
    footnote: str = "I'm a bot 🤖 and not always perfect."

    return documents_answer_template.format(documents=documents, footnote=footnote)


def add_sources(history, completion):
    if completion.answer_relevant:
        formatted_sources = format_sources(completion.matched_documents)
        history.append([None, formatted_sources])

    return history


def user(user_input, history):
    """Adds user's question immediately to the chat."""
    return "", history + [[user_input, None]]


def chat(history):
    user_input = history[-1][0]

    completion = buster.process_input(user_input)
    print(completion)

    history[-1][1] = ""

    for token in completion.answer_generator:
        history[-1][1] += token

        yield history, completion


block = gr.Blocks()

with block:
    gr.Markdown(
        """<h1><center>Buster 🤖: A Question-Answering Bot for your documentation</center></h1>"""
    )
    gr.Markdown(
        """
    ## Welcome to Buster!
    This chatbot is designed to answer any questions related to the [huggingface transformers](https://huggingface.co/docs/transformers/index) library.
    It uses ChatGPT + embeddings to search the docs for relevant sections and uses them to answer questions. It can then cite its sources back to you to verify the information.
    Note that LLMs are prone to hallucination, so all outputs should always be vetted by users.

    #### The Code is open-sourced and available on [Github](https://www.github.com/jerpint/buster)
    """
    )

    chatbot = gr.Chatbot()

    with gr.Row():
        with gr.Column(scale=4):
            question = gr.Textbox(
                label="What's your question?",
                placeholder="Ask a question to AI stackoverflow here...",
                lines=1,
            )
        submit = gr.Button(value="Send", variant="secondary")

    examples = gr.Examples(
        examples=[
            "What kind of models should I use for images and text?",
            "When should I finetune a model vs. training it form scratch?",
            "Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?",
        ],
        inputs=question,
    )

    gr.HTML("️<center> Created with ❤️ by @jerpint and @hadrienbertrand.")

    response = gr.State()

    submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
        chat, inputs=[chatbot], outputs=[chatbot, response]
    ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
    question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
        chat, inputs=[chatbot], outputs=[chatbot, response]
    ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])


block.launch()