Sean-Case
commited on
Commit
•
f6036ad
1
Parent(s):
26e86cf
Tidied up intro text
Browse files- app.py +3 -3
- chatfuncs/chatfuncs.py +104 -5
app.py
CHANGED
@@ -111,7 +111,7 @@ with block:
|
|
111 |
|
112 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
113 |
|
114 |
-
gr.Markdown("Chat with a document (alpha). This is a small model, that can only answer specific questions that are answered in the text. It cannot give overall impressions of or summarise the document. By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page
|
115 |
|
116 |
current_source = gr.Textbox(label="Current data source that is loaded into the app", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf")
|
117 |
|
@@ -137,8 +137,8 @@ with block:
|
|
137 |
"What are the 2030 outcomes for Lambeth?"])
|
138 |
|
139 |
with gr.Row():
|
140 |
-
current_topic = gr.Textbox(label="Keywords related to current conversation topic. If you want to talk about something else, press 'New topic'", placeholder="Keywords related to the conversation topic will appear here")
|
141 |
-
clear = gr.Button(value="
|
142 |
|
143 |
|
144 |
with gr.Tab("Load in a different PDF file or web page to chat"):
|
|
|
111 |
|
112 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
113 |
|
114 |
+
gr.Markdown("Chat with a document (alpha). This is a small model, that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nWarnings: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
|
115 |
|
116 |
current_source = gr.Textbox(label="Current data source that is loaded into the app", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf")
|
117 |
|
|
|
137 |
"What are the 2030 outcomes for Lambeth?"])
|
138 |
|
139 |
with gr.Row():
|
140 |
+
current_topic = gr.Textbox(label="Note: Feature currently disabled - Keywords related to current conversation topic. If you want to talk about something else, press 'New topic'", placeholder="Keywords related to the conversation topic will appear here")
|
141 |
+
clear = gr.Button(value="Clear chat", variant="secondary", scale=0)
|
142 |
|
143 |
|
144 |
with gr.Tab("Load in a different PDF file or web page to chat"):
|
chatfuncs/chatfuncs.py
CHANGED
@@ -46,7 +46,7 @@ import gradio as gr
|
|
46 |
|
47 |
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
48 |
print("Running on device:", torch_device)
|
49 |
-
threads = torch.get_num_threads()
|
50 |
print("CPU threads:", threads)
|
51 |
|
52 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
@@ -73,7 +73,7 @@ threads: int = threads
|
|
73 |
batch_size:int = 512
|
74 |
context_length:int = 2048
|
75 |
gpu_layers:int = 0
|
76 |
-
sample =
|
77 |
|
78 |
## Highlight text constants
|
79 |
hlt_chunk_size = 20
|
@@ -91,9 +91,16 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
|
|
91 |
ctrans_llm = [] # Not leaded by default
|
92 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q4_0.bin')
|
93 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q8_0.bin')
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
#gpt4all_model = GPT4All(model_name= "orca-mini-3b.ggmlv3.q4_0.bin", model_path="models/") # "ggml-mpt-7b-chat.bin"
|
95 |
|
96 |
# Huggingface chat model
|
|
|
97 |
hf_checkpoint = 'declare-lab/flan-alpaca-large'
|
98 |
|
99 |
def create_hf_model(model_name):
|
@@ -115,7 +122,7 @@ def create_hf_model(model_name):
|
|
115 |
elif "mpt" in model_name:
|
116 |
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
117 |
else:
|
118 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
119 |
|
120 |
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
|
121 |
|
@@ -397,7 +404,94 @@ def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_p
|
|
397 |
|
398 |
return docs_keep_as_doc, doc_df, docs_keep_out
|
399 |
|
|
|
400 |
def get_expanded_passages(vectorstore, docs, width):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
"""
|
402 |
Extracts expanded passages based on given documents and a width for context.
|
403 |
|
@@ -410,6 +504,8 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
410 |
- expanded_docs: List of expanded Document objects.
|
411 |
- doc_df: DataFrame representation of expanded_docs.
|
412 |
"""
|
|
|
|
|
413 |
|
414 |
def get_docs_from_vstore(vectorstore):
|
415 |
vector = vectorstore.docstore._dict
|
@@ -446,6 +542,7 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
446 |
return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
|
447 |
|
448 |
vstore_docs = get_docs_from_vstore(vectorstore)
|
|
|
449 |
parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_docs]
|
450 |
|
451 |
#print(docs)
|
@@ -522,6 +619,8 @@ def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prom
|
|
522 |
#print("The final instruction prompt:")
|
523 |
#print(instruction_prompt_out)
|
524 |
|
|
|
|
|
525 |
|
526 |
return instruction_prompt_out, sources_docs_content_string, new_question_kworded
|
527 |
|
@@ -725,11 +824,11 @@ def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
|
725 |
#print(full_prompt)
|
726 |
|
727 |
# Get the model and tokenizer, and tokenize the user text.
|
728 |
-
model_inputs = tokenizer(text=full_prompt, return_tensors="pt").to(torch_device)
|
729 |
|
730 |
# Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
|
731 |
# in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
|
732 |
-
streamer = TextIteratorStreamer(tokenizer, timeout=
|
733 |
generate_kwargs = dict(
|
734 |
model_inputs,
|
735 |
streamer=streamer,
|
|
|
46 |
|
47 |
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
48 |
print("Running on device:", torch_device)
|
49 |
+
threads = 8#torch.get_num_threads()
|
50 |
print("CPU threads:", threads)
|
51 |
|
52 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
|
|
73 |
batch_size:int = 512
|
74 |
context_length:int = 2048
|
75 |
gpu_layers:int = 0
|
76 |
+
sample = True
|
77 |
|
78 |
## Highlight text constants
|
79 |
hlt_chunk_size = 20
|
|
|
91 |
ctrans_llm = [] # Not leaded by default
|
92 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q4_0.bin')
|
93 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q8_0.bin')
|
94 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
|
95 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
|
96 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
|
97 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
|
98 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf')
|
99 |
+
|
100 |
#gpt4all_model = GPT4All(model_name= "orca-mini-3b.ggmlv3.q4_0.bin", model_path="models/") # "ggml-mpt-7b-chat.bin"
|
101 |
|
102 |
# Huggingface chat model
|
103 |
+
#hf_checkpoint = 'jphme/phi-1_5_Wizard_Vicuna_uncensored'
|
104 |
hf_checkpoint = 'declare-lab/flan-alpaca-large'
|
105 |
|
106 |
def create_hf_model(model_name):
|
|
|
122 |
elif "mpt" in model_name:
|
123 |
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
124 |
else:
|
125 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
126 |
|
127 |
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
|
128 |
|
|
|
404 |
|
405 |
return docs_keep_as_doc, doc_df, docs_keep_out
|
406 |
|
407 |
+
|
408 |
def get_expanded_passages(vectorstore, docs, width):
|
409 |
+
|
410 |
+
"""
|
411 |
+
Extracts expanded passages based on given documents and a width for context.
|
412 |
+
|
413 |
+
Parameters:
|
414 |
+
- vectorstore: The primary data source.
|
415 |
+
- docs: List of documents to be expanded.
|
416 |
+
- width: Number of documents to expand around a given document for context.
|
417 |
+
|
418 |
+
Returns:
|
419 |
+
- expanded_docs: List of expanded Document objects.
|
420 |
+
- doc_df: DataFrame representation of expanded_docs.
|
421 |
+
"""
|
422 |
+
|
423 |
+
from collections import defaultdict
|
424 |
+
|
425 |
+
def get_docs_from_vstore(vectorstore):
|
426 |
+
vector = vectorstore.docstore._dict
|
427 |
+
return list(vector.items())
|
428 |
+
|
429 |
+
def extract_details(docs_list):
|
430 |
+
docs_list_out = [tup[1] for tup in docs_list]
|
431 |
+
content = [doc.page_content for doc in docs_list_out]
|
432 |
+
meta = [doc.metadata for doc in docs_list_out]
|
433 |
+
return ''.join(content), meta[0], meta[-1]
|
434 |
+
|
435 |
+
def get_parent_content_and_meta(vstore_docs, width, target):
|
436 |
+
target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
|
437 |
+
parent_vstore_out = [vstore_docs[i] for i in target_range]
|
438 |
+
|
439 |
+
content_str_out, meta_first_out, meta_last_out = [], [], []
|
440 |
+
for _ in parent_vstore_out:
|
441 |
+
content_str, meta_first, meta_last = extract_details(parent_vstore_out)
|
442 |
+
content_str_out.append(content_str)
|
443 |
+
meta_first_out.append(meta_first)
|
444 |
+
meta_last_out.append(meta_last)
|
445 |
+
return content_str_out, meta_first_out, meta_last_out
|
446 |
+
|
447 |
+
def merge_dicts_except_source(d1, d2):
|
448 |
+
merged = {}
|
449 |
+
for key in d1:
|
450 |
+
if key != "source":
|
451 |
+
merged[key] = str(d1[key]) + " to " + str(d2[key])
|
452 |
+
else:
|
453 |
+
merged[key] = d1[key] # or d2[key], based on preference
|
454 |
+
return merged
|
455 |
+
|
456 |
+
def merge_two_lists_of_dicts(list1, list2):
|
457 |
+
return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
|
458 |
+
|
459 |
+
# Step 1: Filter vstore_docs
|
460 |
+
vstore_docs = get_docs_from_vstore(vectorstore)
|
461 |
+
print("Inside get_expanded_passages")
|
462 |
+
print("Docs:", docs)
|
463 |
+
print("Type of Docs:", type(docs))
|
464 |
+
print("Type of first element in Docs:", type(docs[0]))
|
465 |
+
print("Length of first tuple in Docs:", len(docs[0]))
|
466 |
+
|
467 |
+
doc_sources = {doc.metadata['source'] for doc, _ in docs}
|
468 |
+
vstore_docs = [(k, v) for k, v in vstore_docs if v.metadata.get('source') in doc_sources]
|
469 |
+
|
470 |
+
# Step 2: Group by source and proceed
|
471 |
+
vstore_by_source = defaultdict(list)
|
472 |
+
for k, v in vstore_docs:
|
473 |
+
vstore_by_source[v.metadata['source']].append((k, v))
|
474 |
+
|
475 |
+
expanded_docs = []
|
476 |
+
for doc, score in docs:
|
477 |
+
search_source = doc.metadata['source']
|
478 |
+
search_section = doc.metadata['page_section']
|
479 |
+
parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_by_source[search_source]]
|
480 |
+
search_index = parent_vstore_meta_section.index(search_section) if search_section in parent_vstore_meta_section else -1
|
481 |
+
|
482 |
+
content_str, meta_first, meta_last = get_parent_content_and_meta(vstore_by_source[search_source], width, search_index)
|
483 |
+
meta_full = merge_two_lists_of_dicts(meta_first, meta_last)
|
484 |
+
|
485 |
+
expanded_doc = (Document(page_content=content_str[0], metadata=meta_full[0]), score)
|
486 |
+
expanded_docs.append(expanded_doc)
|
487 |
+
|
488 |
+
doc_df = create_doc_df(expanded_docs) # Assuming you've defined the 'create_doc_df' function elsewhere
|
489 |
+
|
490 |
+
return expanded_docs, doc_df
|
491 |
+
|
492 |
+
|
493 |
+
def get_expanded_passages_orig(vectorstore, docs, width):
|
494 |
+
|
495 |
"""
|
496 |
Extracts expanded passages based on given documents and a width for context.
|
497 |
|
|
|
504 |
- expanded_docs: List of expanded Document objects.
|
505 |
- doc_df: DataFrame representation of expanded_docs.
|
506 |
"""
|
507 |
+
|
508 |
+
from collections import defaultdict
|
509 |
|
510 |
def get_docs_from_vstore(vectorstore):
|
511 |
vector = vectorstore.docstore._dict
|
|
|
542 |
return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
|
543 |
|
544 |
vstore_docs = get_docs_from_vstore(vectorstore)
|
545 |
+
|
546 |
parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_docs]
|
547 |
|
548 |
#print(docs)
|
|
|
619 |
#print("The final instruction prompt:")
|
620 |
#print(instruction_prompt_out)
|
621 |
|
622 |
+
print('Final prompt is: ')
|
623 |
+
print(instruction_prompt_out)
|
624 |
|
625 |
return instruction_prompt_out, sources_docs_content_string, new_question_kworded
|
626 |
|
|
|
824 |
#print(full_prompt)
|
825 |
|
826 |
# Get the model and tokenizer, and tokenize the user text.
|
827 |
+
model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device) # return_attention_mask=False was added
|
828 |
|
829 |
# Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
|
830 |
# in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
|
831 |
+
streamer = TextIteratorStreamer(tokenizer, timeout=120., skip_prompt=True, skip_special_tokens=True)
|
832 |
generate_kwargs = dict(
|
833 |
model_inputs,
|
834 |
streamer=streamer,
|