jerpint commited on
Commit
e0e448c
β€’
1 Parent(s): fc1544a

support multiple sources (#10)

Browse files

* support multiple sources

* start app with all sources enabled

Files changed (3) hide show
  1. cfg.py +2 -1
  2. gradio_app.py +11 -5
  3. requirements.txt +1 -1
cfg.py CHANGED
@@ -21,7 +21,6 @@ PASSWORD = os.getenv("BUSTER_PASSWORD")
21
 
22
  HUB_TOKEN = os.getenv("HUB_TOKEN")
23
  REPO_ID = os.getenv("HF_DATASET")
24
- # HUB_DB_FILE = "deeplake_store.zip"
25
 
26
  DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "wiki_tai_langchain")
27
  ZIP_FILE = DEEPLAKE_DATASET + ".zip"
@@ -79,6 +78,8 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
79
  "thresh": 0.7,
80
  "max_tokens": 2000,
81
  "embedding_model": "text-embedding-ada-002",
 
 
82
  },
83
  documents_answerer_cfg={
84
  "no_documents_message": "No blog posts are available for this question.",
 
21
 
22
  HUB_TOKEN = os.getenv("HUB_TOKEN")
23
  REPO_ID = os.getenv("HF_DATASET")
 
24
 
25
  DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "wiki_tai_langchain")
26
  ZIP_FILE = DEEPLAKE_DATASET + ".zip"
 
78
  "thresh": 0.7,
79
  "max_tokens": 2000,
80
  "embedding_model": "text-embedding-ada-002",
81
+ "exec_option": "compute_engine",
82
+ "use_tql": True,
83
  },
84
  documents_answerer_cfg={
85
  "no_documents_message": "No blog posts are available for this question.",
gradio_app.py CHANGED
@@ -1,5 +1,6 @@
1
  import logging
2
  import os
 
3
 
4
  import gradio as gr
5
  import pandas as pd
@@ -15,6 +16,7 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
15
  logger = logging.getLogger(__name__)
16
  logging.basicConfig(level=logging.INFO)
17
 
 
18
 
19
  def check_auth(username: str, password: str) -> bool:
20
  valid_user = username == cfg.USERNAME
@@ -40,7 +42,7 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
40
  "similarity_to_answer", ascending=False
41
  ).drop_duplicates("title", keep="first")
42
 
43
- documents = "\n\n".join(
44
  [
45
  document_template.format(document=document)
46
  for _, document in matched_documents.iterrows()
@@ -64,10 +66,10 @@ def user(user_input, history):
64
  return "", history + [[user_input, None]]
65
 
66
 
67
- def chat(history):
68
  user_input = history[-1][0]
69
 
70
- completion = buster.process_input(user_input)
71
 
72
  history[-1][1] = ""
73
 
@@ -85,6 +87,10 @@ with block:
85
  "<h3><center>Buster πŸ€–: A Question-Answering Bot for your documentation</center></h3>"
86
  )
87
 
 
 
 
 
88
  chatbot = gr.Chatbot()
89
 
90
  with gr.Row():
@@ -107,10 +113,10 @@ with block:
107
  response = gr.State()
108
 
109
  submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
110
- chat, inputs=[chatbot], outputs=[chatbot, response]
111
  ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
112
  question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
113
- chat, inputs=[chatbot], outputs=[chatbot, response]
114
  ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
115
 
116
 
 
1
  import logging
2
  import os
3
+ from typing import Optional
4
 
5
  import gradio as gr
6
  import pandas as pd
 
16
  logger = logging.getLogger(__name__)
17
  logging.basicConfig(level=logging.INFO)
18
 
19
+ AVAILABLE_SOURCES = ["towardsai", "wikipedia", "langchain_course"]
20
 
21
  def check_auth(username: str, password: str) -> bool:
22
  valid_user = username == cfg.USERNAME
 
42
  "similarity_to_answer", ascending=False
43
  ).drop_duplicates("title", keep="first")
44
 
45
+ documents = "\n".join(
46
  [
47
  document_template.format(document=document)
48
  for _, document in matched_documents.iterrows()
 
66
  return "", history + [[user_input, None]]
67
 
68
 
69
+ def get_answer(history, sources: Optional[list[str]] = None):
70
  user_input = history[-1][0]
71
 
72
+ completion = buster.process_input(user_input, sources=sources)
73
 
74
  history[-1][1] = ""
75
 
 
87
  "<h3><center>Buster πŸ€–: A Question-Answering Bot for your documentation</center></h3>"
88
  )
89
 
90
+ source_selection = gr.CheckboxGroup(
91
+ choices=AVAILABLE_SOURCES, label="Select Sources", value=AVAILABLE_SOURCES
92
+ )
93
+
94
  chatbot = gr.Chatbot()
95
 
96
  with gr.Row():
 
113
  response = gr.State()
114
 
115
  submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
116
+ get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, response]
117
  ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
118
  question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
119
+ get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, response]
120
  ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
121
 
122
 
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- buster-doctalk==1.0.19
2
  gradio
3
  deeplake
 
1
+ git+https://github.com/jerpint/buster@multiple-sources
2
  gradio
3
  deeplake