Sean-Case commited on
Commit
aa0ad5d
1 Parent(s): 0b0054b

Cleaned up code a bit, added user icons, thumbs up/down

Browse files
Files changed (6) hide show
  1. Link to images.txt +4 -0
  2. app.py +9 -19
  3. bot.png +0 -0
  4. chatfuncs/chatfuncs.py +50 -148
  5. requirements.txt +2 -2
  6. user.jfif +0 -0
Link to images.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Robot emoji: https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Fluent_Emoji_high_contrast_1f916.svg/32px-Fluent_Emoji_high_contrast_1f916.svg.png
2
+
3
+ Bing smile emoji: https://www.bing.com/images/create/a-black-and-white-emoji-with-a-simple-smile2c-black/6523d2c320df409581e85bec80ef3ba8?id=KTdVbixG8oRqR9BzF6AblQ%3d%3d&view=detailv2&idpp=genimg&idpclose=1&FORM=SYDBIC
4
+
app.py CHANGED
@@ -65,35 +65,23 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
65
  print(docs_out)
66
 
67
  vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
68
-
69
- '''
70
- #with open("vectorstore.pkl", "wb") as f:
71
- #pickle.dump(vectorstore, f)
72
- '''
73
-
74
- #if Path(save_to).exists():
75
- # vectorstore_func.save_local(folder_path=save_to)
76
- #else:
77
- # os.mkdir(save_to)
78
- # vectorstore_func.save_local(folder_path=save_to)
79
-
80
- #global vectorstore
81
 
82
- #vectorstore = vectorstore_func
83
 
84
  chatf.vectorstore = vectorstore_func
85
 
86
  out_message = "Document processing complete"
87
 
88
- #print(out_message)
89
- #print(f"> Saved to: {save_to}")
90
-
91
  return out_message, vectorstore_func
92
 
93
  # Gradio chat
94
 
95
  import gradio as gr
96
 
 
 
 
 
 
97
 
98
  block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
99
 
@@ -117,8 +105,8 @@ with block:
117
  with gr.Tab("Chatbot"):
118
 
119
  with gr.Row():
120
- chat_height = 600
121
- chatbot = gr.Chatbot(height=chat_height)
122
  sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
123
 
124
  with gr.Row():
@@ -194,6 +182,8 @@ with block:
194
  clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
195
  clear.click(lambda: None, None, chatbot, queue=False)
196
 
 
 
197
  block.queue(concurrency_count=1).launch(debug=True)
198
  # -
199
 
 
65
  print(docs_out)
66
 
67
  vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
69
 
70
  chatf.vectorstore = vectorstore_func
71
 
72
  out_message = "Document processing complete"
73
 
 
 
 
74
  return out_message, vectorstore_func
75
 
76
  # Gradio chat
77
 
78
  import gradio as gr
79
 
80
+ def vote(data: gr.LikeData):
81
+ if data.liked:
82
+ print("You upvoted this response: " + data.value)
83
+ else:
84
+ print("You downvoted this response: " + data.value)
85
 
86
  block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
87
 
 
105
  with gr.Tab("Chatbot"):
106
 
107
  with gr.Row():
108
+ chat_height = 550
109
+ chatbot = gr.Chatbot(height=chat_height, avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False)
110
  sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
111
 
112
  with gr.Row():
 
182
  clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
183
  clear.click(lambda: None, None, chatbot, queue=False)
184
 
185
+ chatbot.like(vote, None, None)
186
+
187
  block.queue(concurrency_count=1).launch(debug=True)
188
  # -
189
 
bot.png ADDED
chatfuncs/chatfuncs.py CHANGED
@@ -12,9 +12,7 @@ from threading import Thread
12
  from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
13
 
14
  # Alternative model sources
15
- from gpt4all import GPT4All
16
  from ctransformers import AutoModelForCausalLM#, AutoTokenizer
17
-
18
  from dataclasses import asdict, dataclass
19
 
20
  # Langchain functions
@@ -33,8 +31,6 @@ from nltk.tokenize import RegexpTokenizer
33
  from nltk.stem import WordNetLemmatizer
34
  import keybert
35
 
36
- #from transformers.pipelines import pipeline
37
-
38
  # For Name Entity Recognition model
39
  from span_marker import SpanMarkerModel
40
 
@@ -69,6 +65,7 @@ temperature: float = 0.1
69
  top_k: int = 3
70
  top_p: float = 1
71
  repetition_penalty: float = 1.05
 
72
  last_n_tokens: int = 64
73
  max_new_tokens: int = 125
74
  #seed: int = 42
@@ -77,7 +74,7 @@ stream: bool = True
77
  threads: int = threads
78
  batch_size:int = 512
79
  context_length:int = 4096
80
- gpu_layers:int = 0#5#gpu_layers
81
  sample = True
82
 
83
  @dataclass
@@ -99,7 +96,7 @@ class GenerationConfig:
99
 
100
 
101
  ## Highlight text constants
102
- hlt_chunk_size = 20
103
  hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
104
  hlt_overlap = 0
105
 
@@ -110,51 +107,47 @@ ner_model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-mbert-base-mu
110
  # Used to pull out keywords from chat history to add to user queries behind the scenes
111
  kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
112
 
113
-
 
114
 
115
  ## Chat models ##
116
- ctrans_llm = [] # Not leaded by default
117
- ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
118
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
119
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
120
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
121
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
122
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
123
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
124
-
125
 
126
- #ctokenizer = AutoTokenizer.from_pretrained(ctrans_llm)
 
 
 
127
 
128
- # Huggingface chat model
129
- #hf_checkpoint = 'jphme/phi-1_5_Wizard_Vicuna_uncensored'
130
- hf_checkpoint = 'declare-lab/flan-alpaca-large'
131
-
132
- def create_hf_model(model_name):
 
133
 
134
- from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
135
 
136
- # model_id = model_name
137
-
138
- if torch_device == "cuda":
139
- if "flan" in model_name:
140
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
141
- elif "mpt" in model_name:
142
- model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto", trust_remote_code=True)
 
 
143
  else:
144
- model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
145
- else:
146
- if "flan" in model_name:
147
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
148
- elif "mpt" in model_name:
149
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
150
- else:
151
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
152
 
153
- tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
154
 
155
- return model, tokenizer, torch_device
156
 
157
- #model, tokenizer, torch_device = create_hf_model(model_name = hf_checkpoint)
158
 
159
  # Vectorstore funcs
160
 
@@ -439,7 +432,6 @@ def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_p
439
 
440
  return docs_keep_as_doc, doc_df, docs_keep_out
441
 
442
-
443
  def get_expanded_passages(vectorstore, docs, width):
444
 
445
  """
@@ -524,86 +516,6 @@ def get_expanded_passages(vectorstore, docs, width):
524
 
525
  return expanded_docs, doc_df
526
 
527
-
528
- def get_expanded_passages_orig(vectorstore, docs, width):
529
-
530
- """
531
- Extracts expanded passages based on given documents and a width for context.
532
-
533
- Parameters:
534
- - vectorstore: The primary data source.
535
- - docs: List of documents to be expanded.
536
- - width: Number of documents to expand around a given document for context.
537
-
538
- Returns:
539
- - expanded_docs: List of expanded Document objects.
540
- - doc_df: DataFrame representation of expanded_docs.
541
- """
542
-
543
- from collections import defaultdict
544
-
545
- def get_docs_from_vstore(vectorstore):
546
- vector = vectorstore.docstore._dict
547
- return list(vector.items())
548
-
549
- def extract_details(docs_list):
550
- docs_list_out = [tup[1] for tup in docs_list]
551
- content = [doc.page_content for doc in docs_list_out]
552
- meta = [doc.metadata for doc in docs_list_out]
553
- return ''.join(content), meta[0], meta[-1]
554
-
555
- def get_parent_content_and_meta(vstore_docs, width, target):
556
- target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
557
- parent_vstore_out = [vstore_docs[i] for i in target_range]
558
-
559
- content_str_out, meta_first_out, meta_last_out = [], [], []
560
- for _ in parent_vstore_out:
561
- content_str, meta_first, meta_last = extract_details(parent_vstore_out)
562
- content_str_out.append(content_str)
563
- meta_first_out.append(meta_first)
564
- meta_last_out.append(meta_last)
565
- return content_str_out, meta_first_out, meta_last_out
566
-
567
- def merge_dicts_except_source(d1, d2):
568
- merged = {}
569
- for key in d1:
570
- if key != "source":
571
- merged[key] = str(d1[key]) + " to " + str(d2[key])
572
- else:
573
- merged[key] = d1[key] # or d2[key], based on preference
574
- return merged
575
-
576
- def merge_two_lists_of_dicts(list1, list2):
577
- return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
578
-
579
- vstore_docs = get_docs_from_vstore(vectorstore)
580
-
581
- parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_docs]
582
-
583
- #print(docs)
584
-
585
- expanded_docs = []
586
- for doc, score in docs:
587
- search_section = doc.metadata['page_section']
588
- search_index = parent_vstore_meta_section.index(search_section) if search_section in parent_vstore_meta_section else -1
589
-
590
- content_str, meta_first, meta_last = get_parent_content_and_meta(vstore_docs, width, search_index)
591
- #print("Meta first:")
592
- #print(meta_first)
593
- #print("Meta last:")
594
- #print(meta_last)
595
- #print("Meta last end.")
596
- meta_full = merge_two_lists_of_dicts(meta_first, meta_last)
597
-
598
- #print(meta_full)
599
-
600
- expanded_doc = (Document(page_content=content_str[0], metadata=meta_full[0]), score)
601
- expanded_docs.append(expanded_doc)
602
-
603
- doc_df = create_doc_df(expanded_docs) # Assuming you've defined the 'create_doc_df' function elsewhere
604
-
605
- return expanded_docs, doc_df
606
-
607
  def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
608
 
609
  question = inputs["question"]
@@ -838,18 +750,6 @@ def highlight_found_text(search_text: str, full_text: str, hlt_chunk_size:int=hl
838
  return "".join(pos_tokens)
839
 
840
  # # Chat functions
841
- def produce_streaming_answer_chatbot_gpt4all(history, full_prompt):
842
-
843
- print("The question is: ")
844
- print(full_prompt)
845
-
846
- # Pull the generated text from the streamer, and update the model output.
847
- history[-1][1] = ""
848
- for new_text in gpt4all_model.generate(full_prompt, max_tokens=2000, streaming=True):
849
- if new_text == None: new_text = ""
850
- history[-1][1] += new_text
851
- yield history
852
-
853
  def produce_streaming_answer_chatbot_hf(history, full_prompt):
854
 
855
  #print("The question is: ")
@@ -866,7 +766,7 @@ def produce_streaming_answer_chatbot_hf(history, full_prompt):
866
  streamer=streamer,
867
  max_new_tokens=max_new_tokens,
868
  do_sample=sample,
869
- repetition_penalty=1.3,
870
  top_p=top_p,
871
  temperature=temperature,
872
  top_k=top_k
@@ -902,26 +802,28 @@ def produce_streaming_answer_chatbot_ctrans(history, full_prompt):
902
 
903
  tokens = ctrans_llm.tokenize(full_prompt)
904
 
905
- #import psutil
906
- #from loguru import logger
907
-
908
- #_ = [elm for elm in full_prompt.splitlines() if elm.strip()]
909
- #stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
910
- #print(stop_string)
911
-
912
- #logger.debug(f"{stop_string=} not used")
913
-
914
- #_ = psutil.cpu_count(logical=False) - 1
915
- #cpu_count: int = int(_) if _ else 1
916
- #logger.debug(f"{cpu_count=}")
917
 
918
  # Pull the generated text from the streamer, and update the model output.
919
- #config = GenerationConfig(reset=True)
 
 
 
 
920
  history[-1][1] = ""
921
  for new_text in ctrans_llm.generate(tokens, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty): #ctrans_generate(prompt=tokens, config=config):
922
  if new_text == None: new_text = ""
923
  history[-1][1] += ctrans_llm.detokenize(new_text) #new_text
 
924
  yield history
 
 
 
 
 
 
 
 
925
 
926
 
927
  def ctrans_generate(
 
12
  from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
13
 
14
  # Alternative model sources
 
15
  from ctransformers import AutoModelForCausalLM#, AutoTokenizer
 
16
  from dataclasses import asdict, dataclass
17
 
18
  # Langchain functions
 
31
  from nltk.stem import WordNetLemmatizer
32
  import keybert
33
 
 
 
34
  # For Name Entity Recognition model
35
  from span_marker import SpanMarkerModel
36
 
 
65
  top_k: int = 3
66
  top_p: float = 1
67
  repetition_penalty: float = 1.05
68
+ flan_alpaca_repetition_penalty: float = 1.3
69
  last_n_tokens: int = 64
70
  max_new_tokens: int = 125
71
  #seed: int = 42
 
74
  threads: int = threads
75
  batch_size:int = 512
76
  context_length:int = 4096
77
+ gpu_layers:int = 0#5#gpu_layers For serving on Huggingface set to 0 as using free CPU instance
78
  sample = True
79
 
80
  @dataclass
 
96
 
97
 
98
  ## Highlight text constants
99
+ hlt_chunk_size = 15
100
  hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
101
  hlt_overlap = 0
102
 
 
107
  # Used to pull out keywords from chat history to add to user queries behind the scenes
108
  kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
109
 
110
+ ## Set model type ##
111
+ model_type = "ctrans"
112
 
113
  ## Chat models ##
 
 
 
 
 
 
 
 
 
114
 
115
+ if model_type == "ctrans":
116
+ ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
117
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
118
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
119
 
120
+ if model_type == "hf":
121
+ # Huggingface chat model
122
+ #hf_checkpoint = 'jphme/phi-1_5_Wizard_Vicuna_uncensored'
123
+ hf_checkpoint = 'declare-lab/flan-alpaca-large'
124
+
125
+ def create_hf_model(model_name):
126
 
127
+ from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
128
 
129
+ # model_id = model_name
130
+
131
+ if torch_device == "cuda":
132
+ if "flan" in model_name:
133
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
134
+ elif "mpt" in model_name:
135
+ model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto", trust_remote_code=True)
136
+ else:
137
+ model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
138
  else:
139
+ if "flan" in model_name:
140
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
141
+ elif "mpt" in model_name:
142
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
143
+ else:
144
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
 
 
145
 
146
+ tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
147
 
148
+ return model, tokenizer, torch_device
149
 
150
+ model, tokenizer, torch_device = create_hf_model(model_name = hf_checkpoint)
151
 
152
  # Vectorstore funcs
153
 
 
432
 
433
  return docs_keep_as_doc, doc_df, docs_keep_out
434
 
 
435
  def get_expanded_passages(vectorstore, docs, width):
436
 
437
  """
 
516
 
517
  return expanded_docs, doc_df
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
520
 
521
  question = inputs["question"]
 
750
  return "".join(pos_tokens)
751
 
752
  # # Chat functions
 
 
 
 
 
 
 
 
 
 
 
 
753
  def produce_streaming_answer_chatbot_hf(history, full_prompt):
754
 
755
  #print("The question is: ")
 
766
  streamer=streamer,
767
  max_new_tokens=max_new_tokens,
768
  do_sample=sample,
769
+ repetition_penalty=flan_alpaca_repetition_penalty,
770
  top_p=top_p,
771
  temperature=temperature,
772
  top_k=top_k
 
802
 
803
  tokens = ctrans_llm.tokenize(full_prompt)
804
 
805
+ #config = GenerationConfig(reset=True)
 
 
 
 
 
 
 
 
 
 
 
806
 
807
  # Pull the generated text from the streamer, and update the model output.
808
+ import time
809
+ start = time.time()
810
+ NUM_TOKENS=0
811
+ print('-'*4+'Start Generation'+'-'*4)
812
+
813
  history[-1][1] = ""
814
  for new_text in ctrans_llm.generate(tokens, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty): #ctrans_generate(prompt=tokens, config=config):
815
  if new_text == None: new_text = ""
816
  history[-1][1] += ctrans_llm.detokenize(new_text) #new_text
817
+ NUM_TOKENS+=1
818
  yield history
819
+
820
+ time_generate = time.time() - start
821
+ print('\n')
822
+ print('-'*4+'End Generation'+'-'*4)
823
+ print(f'Num of generated tokens: {NUM_TOKENS}')
824
+ print(f'Time for complete generation: {time_generate}s')
825
+ print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
826
+ print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
827
 
828
 
829
  def ctrans_generate(
requirements.txt CHANGED
@@ -13,8 +13,8 @@ bitsandbytes
13
  accelerate
14
  optimum
15
  pypdf
16
- gradio
17
- gradio_client==0.2.7
18
  python-docx
19
  gpt4all
20
  ctransformers[cuda]
 
13
  accelerate
14
  optimum
15
  pypdf
16
+ gradio==3.47.1
17
+ gradio_client==0.6.0
18
  python-docx
19
  gpt4all
20
  ctransformers[cuda]
user.jfif ADDED
Binary file (53.4 kB). View file