Spaces:
Running
Running
Sean-Case
commited on
Commit
•
102df35
1
Parent(s):
bc459f6
Adapted code to keep newly-loaded vectorstores within local user state
Browse files- app.py +18 -17
- chatfuncs/chatfuncs.py +7 -7
app.py
CHANGED
@@ -89,16 +89,28 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
|
89 |
#print(out_message)
|
90 |
#print(f"> Saved to: {save_to}")
|
91 |
|
92 |
-
return out_message
|
93 |
|
94 |
# Gradio chat
|
95 |
|
96 |
import gradio as gr
|
97 |
|
|
|
|
|
|
|
98 |
block = gr.Blocks(css=".gradio-container {background-color: black}")
|
99 |
|
100 |
with block:
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
103 |
|
104 |
gr.Markdown("Chat with a document (alpha). By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page (feature temporarily disabled), please select below. The chatbot will not answer questions where answered can't be found on the website. If switching topic, please click the 'New topic' button as the bot will assume follow up questions are linked to the first. Sources are shown underneath the chat area.\n\nWarnings: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
|
@@ -117,7 +129,6 @@ with block:
|
|
117 |
lines=1,
|
118 |
)
|
119 |
|
120 |
-
|
121 |
submit = gr.Button(value="Send message", variant="secondary", scale = 1)
|
122 |
|
123 |
examples_set = gr.Examples(label="Examples for the Lambeth Borough Plan",
|
@@ -151,42 +162,32 @@ with block:
|
|
151 |
"<center>Powered by Flan Alpaca and Langchain</a></center>"
|
152 |
)
|
153 |
|
154 |
-
ingest_text = gr.State()
|
155 |
-
ingest_metadata = gr.State()
|
156 |
-
ingest_docs = gr.State()
|
157 |
-
|
158 |
-
embeddings_state = gr.State()
|
159 |
-
vectorstore_state = gr.State()
|
160 |
-
|
161 |
-
chat_history_state = gr.State()
|
162 |
-
instruction_prompt_out = gr.State()
|
163 |
-
|
164 |
#def hide_examples():
|
165 |
# return gr.Examples.update(visible=False)
|
166 |
|
167 |
# Load in a pdf
|
168 |
load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
|
169 |
then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
|
170 |
-
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=ingest_embed_out) # #then(load_embeddings, outputs=[embeddings_state]).\
|
171 |
#then(hide_examples)
|
172 |
|
173 |
# Load in a webpage
|
174 |
load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
|
175 |
then(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
|
176 |
-
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=ingest_embed_out)
|
177 |
#then(hide_examples)
|
178 |
|
179 |
# Load in a webpage
|
180 |
|
181 |
# Click/enter to send message action
|
182 |
-
response_click = submit.click(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
|
183 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
184 |
then(chatf.produce_streaming_answer_chatbot_hf, inputs=[chatbot, instruction_prompt_out], outputs=chatbot)
|
185 |
response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
186 |
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
187 |
then(lambda: gr.update(interactive=True), None, [message], queue=False)
|
188 |
|
189 |
-
response_enter = message.submit(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
|
190 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
191 |
then(chatf.produce_streaming_answer_chatbot_hf, [chatbot, instruction_prompt_out], chatbot)
|
192 |
response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
|
|
89 |
#print(out_message)
|
90 |
#print(f"> Saved to: {save_to}")
|
91 |
|
92 |
+
return out_message, vectorstore_func
|
93 |
|
94 |
# Gradio chat
|
95 |
|
96 |
import gradio as gr
|
97 |
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
block = gr.Blocks(css=".gradio-container {background-color: black}")
|
102 |
|
103 |
with block:
|
104 |
+
ingest_text = gr.State()
|
105 |
+
ingest_metadata = gr.State()
|
106 |
+
ingest_docs = gr.State()
|
107 |
+
|
108 |
+
embeddings_state = gr.State(globals()["embeddings"])
|
109 |
+
vectorstore_state = gr.State(globals()["vectorstore"])
|
110 |
+
|
111 |
+
chat_history_state = gr.State()
|
112 |
+
instruction_prompt_out = gr.State()
|
113 |
+
|
114 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
115 |
|
116 |
gr.Markdown("Chat with a document (alpha). By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page (feature temporarily disabled), please select below. The chatbot will not answer questions where answered can't be found on the website. If switching topic, please click the 'New topic' button as the bot will assume follow up questions are linked to the first. Sources are shown underneath the chat area.\n\nWarnings: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
|
|
|
129 |
lines=1,
|
130 |
)
|
131 |
|
|
|
132 |
submit = gr.Button(value="Send message", variant="secondary", scale = 1)
|
133 |
|
134 |
examples_set = gr.Examples(label="Examples for the Lambeth Borough Plan",
|
|
|
162 |
"<center>Powered by Flan Alpaca and Langchain</a></center>"
|
163 |
)
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
#def hide_examples():
|
166 |
# return gr.Examples.update(visible=False)
|
167 |
|
168 |
# Load in a pdf
|
169 |
load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
|
170 |
then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
|
171 |
+
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state]) # #then(load_embeddings, outputs=[embeddings_state]).\
|
172 |
#then(hide_examples)
|
173 |
|
174 |
# Load in a webpage
|
175 |
load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
|
176 |
then(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
|
177 |
+
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state])
|
178 |
#then(hide_examples)
|
179 |
|
180 |
# Load in a webpage
|
181 |
|
182 |
# Click/enter to send message action
|
183 |
+
response_click = submit.click(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
|
184 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
185 |
then(chatf.produce_streaming_answer_chatbot_hf, inputs=[chatbot, instruction_prompt_out], outputs=chatbot)
|
186 |
response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
187 |
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
188 |
then(lambda: gr.update(interactive=True), None, [message], queue=False)
|
189 |
|
190 |
+
response_enter = message.submit(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
|
191 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
192 |
then(chatf.produce_streaming_answer_chatbot_hf, [chatbot, instruction_prompt_out], chatbot)
|
193 |
response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
chatfuncs/chatfuncs.py
CHANGED
@@ -238,11 +238,11 @@ def create_doc_df(docs_keep_out):
|
|
238 |
|
239 |
return doc_df
|
240 |
|
241 |
-
def hybrid_retrieval(new_question_kworded, k_val, out_passages,
|
242 |
vec_score_cut_off, vec_weight, bm25_weight, svm_weight): # ,vectorstore, embeddings
|
243 |
|
244 |
-
vectorstore=globals()["vectorstore"]
|
245 |
-
embeddings=globals()["embeddings"]
|
246 |
|
247 |
|
248 |
docs = vectorstore.similarity_search_with_score(new_question_kworded, k=k_val)
|
@@ -470,7 +470,7 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
470 |
|
471 |
return expanded_docs, doc_df
|
472 |
|
473 |
-
def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory): # ,
|
474 |
|
475 |
question = inputs["question"]
|
476 |
chat_history = inputs["chat_history"]
|
@@ -485,7 +485,7 @@ def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prom
|
|
485 |
#docs_keep_as_doc, docs_content, docs_url = find_relevant_passages(new_question_kworded, k_val = 5, out_passages = 3,
|
486 |
# vec_score_cut_off = 1.3, vec_weight = 1, tfidf_weight = 0.5, svm_weight = 1)
|
487 |
|
488 |
-
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, k_val = 5, out_passages = 2,
|
489 |
vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
|
490 |
#vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
|
491 |
|
@@ -523,7 +523,7 @@ def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prom
|
|
523 |
|
524 |
return instruction_prompt_out, sources_docs_content_string, new_question_kworded
|
525 |
|
526 |
-
def get_history_sources_final_input_prompt(user_input, history, extracted_memory):#):
|
527 |
|
528 |
#if chain_agent is None:
|
529 |
# history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
|
@@ -539,7 +539,7 @@ def get_history_sources_final_input_prompt(user_input, history, extracted_memory
|
|
539 |
instruction_prompt, content_prompt = create_prompt_templates()
|
540 |
instruction_prompt_out, docs_content_string, new_question_kworded =\
|
541 |
create_final_prompt({"question": user_input, "chat_history": history}, #vectorstore,
|
542 |
-
instruction_prompt, content_prompt, extracted_memory)
|
543 |
|
544 |
|
545 |
history.append(user_input)
|
|
|
238 |
|
239 |
return doc_df
|
240 |
|
241 |
+
def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_passages,
|
242 |
vec_score_cut_off, vec_weight, bm25_weight, svm_weight): # ,vectorstore, embeddings
|
243 |
|
244 |
+
#vectorstore=globals()["vectorstore"]
|
245 |
+
#embeddings=globals()["embeddings"]
|
246 |
|
247 |
|
248 |
docs = vectorstore.similarity_search_with_score(new_question_kworded, k=k_val)
|
|
|
470 |
|
471 |
return expanded_docs, doc_df
|
472 |
|
473 |
+
def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
|
474 |
|
475 |
question = inputs["question"]
|
476 |
chat_history = inputs["chat_history"]
|
|
|
485 |
#docs_keep_as_doc, docs_content, docs_url = find_relevant_passages(new_question_kworded, k_val = 5, out_passages = 3,
|
486 |
# vec_score_cut_off = 1.3, vec_weight = 1, tfidf_weight = 0.5, svm_weight = 1)
|
487 |
|
488 |
+
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 5, out_passages = 2,
|
489 |
vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
|
490 |
#vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
|
491 |
|
|
|
523 |
|
524 |
return instruction_prompt_out, sources_docs_content_string, new_question_kworded
|
525 |
|
526 |
+
def get_history_sources_final_input_prompt(user_input, history, extracted_memory, vectorstore, embeddings):#):
|
527 |
|
528 |
#if chain_agent is None:
|
529 |
# history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
|
|
|
539 |
instruction_prompt, content_prompt = create_prompt_templates()
|
540 |
instruction_prompt_out, docs_content_string, new_question_kworded =\
|
541 |
create_final_prompt({"question": user_input, "chat_history": history}, #vectorstore,
|
542 |
+
instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings)
|
543 |
|
544 |
|
545 |
history.append(user_input)
|