Sean-Case commited on
Commit
f6036ad
1 Parent(s): 26e86cf

Tidied up intro text

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. chatfuncs/chatfuncs.py +104 -5
app.py CHANGED
@@ -111,7 +111,7 @@ with block:
111
 
112
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
113
 
114
- gr.Markdown("Chat with a document (alpha). This is a small model, that can only answer specific questions that are answered in the text. It cannot give overall impressions of or summarise the document. By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page (feature temporarily disabled), please select below. The chatbot will not answer questions where answered can't be found on the website. If switching topic, please click the 'New topic' button as the bot will assume follow up questions are linked to the first.\n\nWarnings: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
115
 
116
  current_source = gr.Textbox(label="Current data source that is loaded into the app", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf")
117
 
@@ -137,8 +137,8 @@ with block:
137
  "What are the 2030 outcomes for Lambeth?"])
138
 
139
  with gr.Row():
140
- current_topic = gr.Textbox(label="Keywords related to current conversation topic. If you want to talk about something else, press 'New topic'", placeholder="Keywords related to the conversation topic will appear here")
141
- clear = gr.Button(value="New topic", variant="secondary", scale=0)
142
 
143
 
144
  with gr.Tab("Load in a different PDF file or web page to chat"):
 
111
 
112
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
113
 
114
+ gr.Markdown("Chat with a document (alpha). This is a small model, that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nWarnings: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
115
 
116
  current_source = gr.Textbox(label="Current data source that is loaded into the app", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf")
117
 
 
137
  "What are the 2030 outcomes for Lambeth?"])
138
 
139
  with gr.Row():
140
+ current_topic = gr.Textbox(label="Note: Feature currently disabled - Keywords related to current conversation topic. If you want to talk about something else, press 'New topic'", placeholder="Keywords related to the conversation topic will appear here")
141
+ clear = gr.Button(value="Clear chat", variant="secondary", scale=0)
142
 
143
 
144
  with gr.Tab("Load in a different PDF file or web page to chat"):
chatfuncs/chatfuncs.py CHANGED
@@ -46,7 +46,7 @@ import gradio as gr
46
 
47
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
48
  print("Running on device:", torch_device)
49
- threads = torch.get_num_threads()
50
  print("CPU threads:", threads)
51
 
52
  PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
@@ -73,7 +73,7 @@ threads: int = threads
73
  batch_size:int = 512
74
  context_length:int = 2048
75
  gpu_layers:int = 0
76
- sample = False
77
 
78
  ## Highlight text constants
79
  hlt_chunk_size = 20
@@ -91,9 +91,16 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
91
  ctrans_llm = [] # Not leaded by default
92
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q4_0.bin')
93
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q8_0.bin')
 
 
 
 
 
 
94
  #gpt4all_model = GPT4All(model_name= "orca-mini-3b.ggmlv3.q4_0.bin", model_path="models/") # "ggml-mpt-7b-chat.bin"
95
 
96
  # Huggingface chat model
 
97
  hf_checkpoint = 'declare-lab/flan-alpaca-large'
98
 
99
  def create_hf_model(model_name):
@@ -115,7 +122,7 @@ def create_hf_model(model_name):
115
  elif "mpt" in model_name:
116
  model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
117
  else:
118
- model = AutoModelForCausalLM.from_pretrained(model_name)
119
 
120
  tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
121
 
@@ -397,7 +404,94 @@ def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_p
397
 
398
  return docs_keep_as_doc, doc_df, docs_keep_out
399
 
 
400
  def get_expanded_passages(vectorstore, docs, width):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  """
402
  Extracts expanded passages based on given documents and a width for context.
403
 
@@ -410,6 +504,8 @@ def get_expanded_passages(vectorstore, docs, width):
410
  - expanded_docs: List of expanded Document objects.
411
  - doc_df: DataFrame representation of expanded_docs.
412
  """
 
 
413
 
414
  def get_docs_from_vstore(vectorstore):
415
  vector = vectorstore.docstore._dict
@@ -446,6 +542,7 @@ def get_expanded_passages(vectorstore, docs, width):
446
  return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
447
 
448
  vstore_docs = get_docs_from_vstore(vectorstore)
 
449
  parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_docs]
450
 
451
  #print(docs)
@@ -522,6 +619,8 @@ def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prom
522
  #print("The final instruction prompt:")
523
  #print(instruction_prompt_out)
524
 
 
 
525
 
526
  return instruction_prompt_out, sources_docs_content_string, new_question_kworded
527
 
@@ -725,11 +824,11 @@ def produce_streaming_answer_chatbot_hf(history, full_prompt):
725
  #print(full_prompt)
726
 
727
  # Get the model and tokenizer, and tokenize the user text.
728
- model_inputs = tokenizer(text=full_prompt, return_tensors="pt").to(torch_device)
729
 
730
  # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
731
  # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
732
- streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)
733
  generate_kwargs = dict(
734
  model_inputs,
735
  streamer=streamer,
 
46
 
47
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
48
  print("Running on device:", torch_device)
49
+ threads = 8#torch.get_num_threads()
50
  print("CPU threads:", threads)
51
 
52
  PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
 
73
  batch_size:int = 512
74
  context_length:int = 2048
75
  gpu_layers:int = 0
76
+ sample = True
77
 
78
  ## Highlight text constants
79
  hlt_chunk_size = 20
 
91
  ctrans_llm = [] # Not leaded by default
92
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q4_0.bin')
93
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q8_0.bin')
94
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
95
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
96
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
97
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
98
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf')
99
+
100
  #gpt4all_model = GPT4All(model_name= "orca-mini-3b.ggmlv3.q4_0.bin", model_path="models/") # "ggml-mpt-7b-chat.bin"
101
 
102
  # Huggingface chat model
103
+ #hf_checkpoint = 'jphme/phi-1_5_Wizard_Vicuna_uncensored'
104
  hf_checkpoint = 'declare-lab/flan-alpaca-large'
105
 
106
  def create_hf_model(model_name):
 
122
  elif "mpt" in model_name:
123
  model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
124
  else:
125
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
126
 
127
  tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
128
 
 
404
 
405
  return docs_keep_as_doc, doc_df, docs_keep_out
406
 
407
+
408
  def get_expanded_passages(vectorstore, docs, width):
409
+
410
+ """
411
+ Extracts expanded passages based on given documents and a width for context.
412
+
413
+ Parameters:
414
+ - vectorstore: The primary data source.
415
+ - docs: List of documents to be expanded.
416
+ - width: Number of documents to expand around a given document for context.
417
+
418
+ Returns:
419
+ - expanded_docs: List of expanded Document objects.
420
+ - doc_df: DataFrame representation of expanded_docs.
421
+ """
422
+
423
+ from collections import defaultdict
424
+
425
+ def get_docs_from_vstore(vectorstore):
426
+ vector = vectorstore.docstore._dict
427
+ return list(vector.items())
428
+
429
+ def extract_details(docs_list):
430
+ docs_list_out = [tup[1] for tup in docs_list]
431
+ content = [doc.page_content for doc in docs_list_out]
432
+ meta = [doc.metadata for doc in docs_list_out]
433
+ return ''.join(content), meta[0], meta[-1]
434
+
435
+ def get_parent_content_and_meta(vstore_docs, width, target):
436
+ target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
437
+ parent_vstore_out = [vstore_docs[i] for i in target_range]
438
+
439
+ content_str_out, meta_first_out, meta_last_out = [], [], []
440
+ for _ in parent_vstore_out:
441
+ content_str, meta_first, meta_last = extract_details(parent_vstore_out)
442
+ content_str_out.append(content_str)
443
+ meta_first_out.append(meta_first)
444
+ meta_last_out.append(meta_last)
445
+ return content_str_out, meta_first_out, meta_last_out
446
+
447
+ def merge_dicts_except_source(d1, d2):
448
+ merged = {}
449
+ for key in d1:
450
+ if key != "source":
451
+ merged[key] = str(d1[key]) + " to " + str(d2[key])
452
+ else:
453
+ merged[key] = d1[key] # or d2[key], based on preference
454
+ return merged
455
+
456
+ def merge_two_lists_of_dicts(list1, list2):
457
+ return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
458
+
459
+ # Step 1: Filter vstore_docs
460
+ vstore_docs = get_docs_from_vstore(vectorstore)
461
+ print("Inside get_expanded_passages")
462
+ print("Docs:", docs)
463
+ print("Type of Docs:", type(docs))
464
+ print("Type of first element in Docs:", type(docs[0]))
465
+ print("Length of first tuple in Docs:", len(docs[0]))
466
+
467
+ doc_sources = {doc.metadata['source'] for doc, _ in docs}
468
+ vstore_docs = [(k, v) for k, v in vstore_docs if v.metadata.get('source') in doc_sources]
469
+
470
+ # Step 2: Group by source and proceed
471
+ vstore_by_source = defaultdict(list)
472
+ for k, v in vstore_docs:
473
+ vstore_by_source[v.metadata['source']].append((k, v))
474
+
475
+ expanded_docs = []
476
+ for doc, score in docs:
477
+ search_source = doc.metadata['source']
478
+ search_section = doc.metadata['page_section']
479
+ parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_by_source[search_source]]
480
+ search_index = parent_vstore_meta_section.index(search_section) if search_section in parent_vstore_meta_section else -1
481
+
482
+ content_str, meta_first, meta_last = get_parent_content_and_meta(vstore_by_source[search_source], width, search_index)
483
+ meta_full = merge_two_lists_of_dicts(meta_first, meta_last)
484
+
485
+ expanded_doc = (Document(page_content=content_str[0], metadata=meta_full[0]), score)
486
+ expanded_docs.append(expanded_doc)
487
+
488
+ doc_df = create_doc_df(expanded_docs) # Assuming you've defined the 'create_doc_df' function elsewhere
489
+
490
+ return expanded_docs, doc_df
491
+
492
+
493
+ def get_expanded_passages_orig(vectorstore, docs, width):
494
+
495
  """
496
  Extracts expanded passages based on given documents and a width for context.
497
 
 
504
  - expanded_docs: List of expanded Document objects.
505
  - doc_df: DataFrame representation of expanded_docs.
506
  """
507
+
508
+ from collections import defaultdict
509
 
510
  def get_docs_from_vstore(vectorstore):
511
  vector = vectorstore.docstore._dict
 
542
  return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
543
 
544
  vstore_docs = get_docs_from_vstore(vectorstore)
545
+
546
  parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_docs]
547
 
548
  #print(docs)
 
619
  #print("The final instruction prompt:")
620
  #print(instruction_prompt_out)
621
 
622
+ print('Final prompt is: ')
623
+ print(instruction_prompt_out)
624
 
625
  return instruction_prompt_out, sources_docs_content_string, new_question_kworded
626
 
 
824
  #print(full_prompt)
825
 
826
  # Get the model and tokenizer, and tokenize the user text.
827
+ model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device) # return_attention_mask=False was added
828
 
829
  # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
830
  # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
831
+ streamer = TextIteratorStreamer(tokenizer, timeout=120., skip_prompt=True, skip_special_tokens=True)
832
  generate_kwargs = dict(
833
  model_inputs,
834
  streamer=streamer,