jerpint commited on
Commit
e9698e9
β€’
0 Parent(s):

First commit

Browse files
Files changed (5) hide show
  1. Procfile +1 -0
  2. cfg.py +133 -0
  3. gradio_app.py +115 -0
  4. requirements.txt +2 -0
  5. setup.sh +2 -0
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: source setup.sh && python gradio_app.py
cfg.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ from buster.busterbot import Buster, BusterConfig
7
+ from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
8
+ from buster.formatters.documents import DocumentsFormatter
9
+ from buster.formatters.prompts import PromptFormatter
10
+ from buster.retriever import Retriever, SQLiteRetriever
11
+ from buster.tokenizers import GPTTokenizer
12
+ from buster.validators import QuestionAnswerValidator, Validator
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+ logging.basicConfig(level=logging.INFO)
17
+
18
+
19
+ HUB_TOKEN = os.getenv("HUB_TOKEN")
20
+ REPO_ID = "jerpint/towardsai-buster-data"
21
+ HUB_DB_FILE = "documents.db"
22
+ logger.info(f"Downloading {HUB_DB_FILE} from hub...")
23
+ hf_hub_download(
24
+ repo_id=REPO_ID,
25
+ repo_type="dataset",
26
+ filename=HUB_DB_FILE,
27
+ token=HUB_TOKEN,
28
+ local_dir=".",
29
+ )
30
+
31
+
32
+ buster_cfg = BusterConfig(
33
+ validator_cfg={
34
+ "unknown_response_templates": [
35
+ "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
36
+ ],
37
+ "unknown_threshold": 0.85,
38
+ "embedding_model": "text-embedding-ada-002",
39
+ "use_reranking": True,
40
+ "invalid_question_response": "This question does not seem relevant to my current knowledge.",
41
+ "check_question_prompt": """You are an chatbot answering questions on towardsAI, an artificial intelligence blogs.
42
+
43
+ Users will be asking questions about the blog.
44
+ Your job is to determine wether or not a question is a valid question to ask, and should be answered.
45
+ More general questions are not considered valid, even if you might know the response.
46
+ A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
47
+
48
+ For example:
49
+
50
+ Q: How can I setup my own chatbot?
51
+ true
52
+
53
+ Q: What is the meaning of life?
54
+ false
55
+
56
+ A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
57
+ "completion_kwargs": {
58
+ "model": "gpt-3.5-turbo",
59
+ "stream": False,
60
+ "temperature": 0,
61
+ },
62
+ },
63
+ retriever_cfg={
64
+ "db_path": "./documents.db",
65
+ "top_k": 3,
66
+ "thresh": 0.7,
67
+ "max_tokens": 2000,
68
+ "embedding_model": "text-embedding-ada-002",
69
+ },
70
+ documents_answerer_cfg={
71
+ "no_documents_message": "No blog posts are available for this question.",
72
+ },
73
+ completion_cfg={
74
+ "completion_kwargs": {
75
+ "model": "gpt-3.5-turbo",
76
+ "stream": True,
77
+ "temperature": 0,
78
+ },
79
+ },
80
+ tokenizer_cfg={
81
+ "model_name": "gpt-3.5-turbo",
82
+ },
83
+ documents_formatter_cfg={
84
+ "max_tokens": 3500,
85
+ "formatter": "{content}",
86
+ },
87
+ prompt_formatter_cfg={
88
+ "max_tokens": 3500,
89
+ "text_before_docs": (
90
+ "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
91
+ "If the answer is in the documentation, summarize it in a helpful way to the user. "
92
+ "If it isn't, simply reply that you cannot answer the question. "
93
+ "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
94
+ "Here is the documentation: "
95
+ "<DOCUMENTS> "
96
+ ),
97
+ "text_after_docs": (
98
+ "<\DOCUMENTS>\n"
99
+ "REMEMBER:\n"
100
+ "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
101
+ "Here are the rules you must follow:\n"
102
+ "1) You must only respond with information contained in the documentation above. Say you do not know if the information is not provided.\n"
103
+ "2) Make sure to format your answers in Markdown format, including code block and snippets.\n"
104
+ "3) Do not reference any links, urls or hyperlinks in your answers.\n"
105
+ "4) Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
106
+ "5) If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with:\n"
107
+ "'I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?'"
108
+ "For example:\n"
109
+ "What is the meaning of life for a qa bot?\n"
110
+ "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"
111
+ "Now answer the following question:\n"
112
+ ),
113
+ },
114
+ )
115
+
116
+ # initialize buster with the config in cfg.py (adapt to your needs) ...
117
+ # buster_cfg = cfg.buster_cfg
118
+ retriever: Retriever = SQLiteRetriever(**buster_cfg.retriever_cfg)
119
+ tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
120
+ document_answerer: DocumentAnswerer = DocumentAnswerer(
121
+ completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
122
+ documents_formatter=DocumentsFormatter(
123
+ tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
124
+ ),
125
+ prompt_formatter=PromptFormatter(
126
+ tokenizer=tokenizer, **buster_cfg.prompt_formatter_cfg
127
+ ),
128
+ **buster_cfg.documents_answerer_cfg,
129
+ )
130
+ validator: Validator = QuestionAnswerValidator(**buster_cfg.validator_cfg)
131
+ buster: Buster = Buster(
132
+ retriever=retriever, document_answerer=document_answerer, validator=validator
133
+ )
gradio_app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import cfg
4
+ import gradio as gr
5
+ import pandas as pd
6
+ from cfg import buster
7
+
8
+
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+ USERNAME = os.getenv("BUSTER_USERNAME")
15
+ PASSWORD = os.getenv("BUSTER_PASSWORD")
16
+
17
+
18
+ def check_auth(username: str, password: str) -> bool:
19
+ valid_user = username == USERNAME
20
+ valid_password = password == PASSWORD
21
+ is_auth = valid_user and valid_password
22
+ logger.info(f"Log-in attempted by {username=}. {is_auth=}")
23
+ return is_auth
24
+
25
+
26
+ def format_sources(matched_documents: pd.DataFrame) -> str:
27
+ if len(matched_documents) == 0:
28
+ return ""
29
+
30
+ documents_answer_template: str = "πŸ“ Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
31
+ document_template: str = "[πŸ”— {document.title}]({document.url}), relevance: {document.similarity_to_answer:2.1f} %"
32
+
33
+ matched_documents.similarity_to_answer = (
34
+ matched_documents.similarity_to_answer * 100
35
+ )
36
+ documents = "\n".join(
37
+ [
38
+ document_template.format(document=document)
39
+ for _, document in matched_documents.iterrows()
40
+ ]
41
+ )
42
+ footnote: str = "I'm a bot πŸ€– and not always perfect."
43
+
44
+ return documents_answer_template.format(documents=documents, footnote=footnote)
45
+
46
+
47
+ def add_sources(history, completion):
48
+ if completion.answer_relevant:
49
+ formatted_sources = format_sources(completion.matched_documents)
50
+ history.append([None, formatted_sources])
51
+
52
+ return history
53
+
54
+
55
+ def user(user_input, history):
56
+ """Adds user's question immediately to the chat."""
57
+ return "", history + [[user_input, None]]
58
+
59
+
60
+ def chat(history):
61
+ user_input = history[-1][0]
62
+
63
+ completion = buster.process_input(user_input)
64
+
65
+ history[-1][1] = ""
66
+
67
+ for token in completion.answer_generator:
68
+ history[-1][1] += token
69
+
70
+ yield history, completion
71
+
72
+
73
+ block = gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}")
74
+
75
+ with block:
76
+ with gr.Row():
77
+ gr.Markdown(
78
+ "<h3><center>Buster πŸ€–: A Question-Answering Bot for your documentation</center></h3>"
79
+ )
80
+
81
+ chatbot = gr.Chatbot()
82
+
83
+ with gr.Row():
84
+ question = gr.Textbox(
85
+ label="What's your question?",
86
+ placeholder="Ask a question to AI stackoverflow here...",
87
+ lines=1,
88
+ )
89
+ submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
90
+
91
+ examples = gr.Examples(
92
+ examples=[
93
+ "How can I perform backpropagation?",
94
+ "How do I deal with noisy data?",
95
+ "How do I deal with noisy data in 2 words?",
96
+ ],
97
+ inputs=question,
98
+ )
99
+
100
+ gr.Markdown(
101
+ "This application uses GPT to search the docs for relevant info and answer questions."
102
+ )
103
+
104
+ response = gr.State()
105
+
106
+ submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
107
+ chat, inputs=[chatbot], outputs=[chatbot, response]
108
+ ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
109
+ question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
110
+ chat, inputs=[chatbot], outputs=[chatbot, response]
111
+ ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
112
+
113
+
114
+ block.queue(concurrency_count=16)
115
+ block.launch(debug=True, share=False, auth=check_auth)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/jerpint/buster@v1.0.14
2
+ gradio
setup.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ export GRADIO_SERVER_NAME=0.0.0.0
2
+ export GRADIO_SERVER_PORT=$PORT