Sean-Case commited on
Commit
d213c15
1 Parent(s): d5a8385

Improved advanced model prompt, added stop generation button. context prompt improvements

Browse files
Files changed (3) hide show
  1. app.py +7 -5
  2. chatfuncs/chatfuncs.py +93 -42
  3. chatfuncs/ingest.py +1 -1
app.py CHANGED
@@ -90,12 +90,14 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
90
  print(vars(cpu_config))
91
 
92
  try:
93
- model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
94
- #model = AutoModelForCausalLM.from_pretrained('Aryanne/Sheared-LLaMA-1.3B-gguf', model_type='llama', model_file='q8_0-sheared-llama-1.3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
 
95
  #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
96
  except:
97
- model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
98
- #model = AutoModelForCausalLM.from_pretrained('Aryanne/Sheared-LLaMA-1.3B-gguf', model_type='llama', model_file='q8_0-sheared-llama-1.3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
 
99
  #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
100
 
101
 
@@ -228,7 +230,7 @@ with block:
228
  with gr.Tab("Advanced features"):
229
  model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
230
  with gr.Row():
231
- gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=6, step = 1, visible=True)
232
  change_model_button = gr.Button(value="Load model", scale=0)
233
  load_text = gr.Text(label="Load status")
234
 
 
90
  print(vars(cpu_config))
91
 
92
  try:
93
+ #model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
94
+ model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
95
+ #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
96
  #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
97
  except:
98
+ #model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
99
+ model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
100
+ #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
101
  #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
102
 
103
 
 
230
  with gr.Tab("Advanced features"):
231
  model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
232
  with gr.Row():
233
+ gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=6, step = 1, visible=False)
234
  change_model_button = gr.Button(value="Load model", scale=0)
235
  load_text = gr.Text(label="Load status")
236
 
chatfuncs/chatfuncs.py CHANGED
@@ -12,7 +12,7 @@ from threading import Thread
12
  from transformers import pipeline, TextIteratorStreamer
13
 
14
  # Alternative model sources
15
- from dataclasses import asdict, dataclass
16
 
17
  # Langchain functions
18
  from langchain.prompts import PromptTemplate
@@ -55,8 +55,8 @@ model = [] # Define empty list for model functions to run
55
  tokenizer = [] # Define empty list for model functions to run
56
 
57
  ## Highlight text constants
58
- hlt_chunk_size = 15
59
- hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
60
  hlt_overlap = 4
61
 
62
  ## Initialise NER model ##
@@ -217,58 +217,106 @@ def base_prompt_templates(model_type = "Flan Alpaca"):
217
  # The main prompt:
218
 
219
  instruction_prompt_template_alpaca_quote = """### Instruction:
220
- Quote directly from the SOURCE below that best answers the QUESTION. Only quote full sentences in the correct order. If you cannot find an answer, start your response with "My best guess is: ".
221
 
222
- CONTENT: {summaries}
223
-
224
- QUESTION: {question}
225
 
226
- Response:"""
227
 
228
  instruction_prompt_template_alpaca = """### Instruction:
229
- ### User:
230
- Answer the QUESTION using information from the following CONTENT.
231
- CONTENT: {summaries}
232
- QUESTION: {question}
233
-
234
- Response:"""
235
-
236
- instruction_prompt_template_sheared_llama = """Answer the QUESTION using information from the following CONTENT.
237
- CONTENT: {summaries}
238
- QUESTION: {question}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
- Answer:"""
241
 
242
  instruction_prompt_template_orca = """
243
- ### System:
244
- You are an AI assistant that follows instruction extremely well. Help as much as you can.
245
- ### User:
246
- Answer the QUESTION with a short response using information from the following CONTENT.
247
- CONTENT: {summaries}
248
- QUESTION: {question}
249
-
250
- ### Response:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  instruction_prompt_mistral_orca = """<|im_start|>system\n
253
- You are an AI assistant that follows instruction extremely well. Help as much as you can.
254
- <|im_start|>user\n
255
- Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
256
- CONTENT: {summaries}
257
- QUESTION: {question}\n
258
- Answer:<|im_end|>"""
259
 
260
  instruction_prompt_tinyllama_orca = """<|im_start|>system\n
261
- You are an AI assistant that follows instruction extremely well. Help as much as you can.
262
- <|im_start|>user\n
263
- Answer the QUESTION using information from the following CONTENT. Only quote text that directly answers the question and nothing more. If you can't find an answer to the question, respond with "Sorry, I can't find an answer to that question.".
264
- CONTENT: {summaries}
265
- QUESTION: {question}\n
266
- Answer:<|im_end|>"""
 
 
 
 
 
 
 
 
 
267
 
268
  if model_type == "Flan Alpaca":
269
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
270
  elif model_type == "Orca Mini":
271
- INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
272
 
273
  return INSTRUCTION_PROMPT, CONTENT_PROMPT
274
 
@@ -281,7 +329,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt, content
281
  new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
282
 
283
 
284
- docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 5, out_passages = 1,
285
  vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
286
  #vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
287
 
@@ -382,6 +430,8 @@ def produce_streaming_answer_chatbot(history, full_prompt, model_type):
382
 
383
  gen_config = CtransGenGenerationConfig()
384
 
 
 
385
  # Pull the generated text from the streamer, and update the model output.
386
  start = time.time()
387
  NUM_TOKENS=0
@@ -633,7 +683,8 @@ def get_expanded_passages(vectorstore, docs, width):
633
  return ''.join(content), meta[0], meta[-1]
634
 
635
  def get_parent_content_and_meta(vstore_docs, width, target):
636
- target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
 
637
  parent_vstore_out = [vstore_docs[i] for i in target_range]
638
 
639
  content_str_out, meta_first_out, meta_last_out = [], [], []
 
12
  from transformers import pipeline, TextIteratorStreamer
13
 
14
  # Alternative model sources
15
+ #from dataclasses import asdict, dataclass
16
 
17
  # Langchain functions
18
  from langchain.prompts import PromptTemplate
 
55
  tokenizer = [] # Define empty list for model functions to run
56
 
57
  ## Highlight text constants
58
+ hlt_chunk_size = 12
59
+ hlt_strat = [" ", ". ", "! ", "? ", ": ", "\n\n", "\n", ", "]
60
  hlt_overlap = 4
61
 
62
  ## Initialise NER model ##
 
217
  # The main prompt:
218
 
219
  instruction_prompt_template_alpaca_quote = """### Instruction:
220
+ Quote directly from the SOURCE below that best answers the QUESTION. Only quote full sentences in the correct order. If you cannot find an answer, start your response with "My best guess is: ".
221
 
222
+ CONTENT: {summaries}
223
+ QUESTION: {question}
 
224
 
225
+ Response:"""
226
 
227
  instruction_prompt_template_alpaca = """### Instruction:
228
+ ### User:
229
+ Answer the QUESTION using information from the following CONTENT.
230
+ CONTENT: {summaries}
231
+ QUESTION: {question}
232
+
233
+ Response:"""
234
+
235
+ instruction_prompt_template_openllama = """Answer the QUESTION using information from the following CONTENT.
236
+ QUESTION - {question}
237
+ CONTENT - {summaries}
238
+ Answer:"""
239
+
240
+ instruction_prompt_template_platypus = """### Instruction:
241
+ Answer the QUESTION using information from the following CONTENT.
242
+ CONTENT: {summaries}
243
+ QUESTION: {question}
244
+ ### Response:"""
245
+
246
+ instruction_prompt_template_wizard_orca_quote = """### HUMAN:
247
+ Quote text from the CONTENT to answer the QUESTION below.
248
+ CONTENT - {summaries}
249
+ QUESTION - {question}
250
+ ### RESPONSE:
251
+ """
252
+
253
+ instruction_prompt_template_wizard_orca = """### HUMAN:
254
+ Answer the QUESTION below based on the CONTENT. Only refer to CONTENT that directly answers the question.
255
+ CONTENT - {summaries}
256
+ QUESTION - {question}
257
+ ### RESPONSE:
258
+ """
259
 
 
260
 
261
  instruction_prompt_template_orca = """
262
+ ### System:
263
+ You are an AI assistant that follows instruction extremely well. Help as much as you can.
264
+ ### User:
265
+ Answer the QUESTION with a short response using information from the following CONTENT.
266
+ QUESTION: {question}
267
+ CONTENT: {summaries}
268
+
269
+ ### Response:"""
270
+
271
+ instruction_prompt_template_orca_quote = """
272
+ ### System:
273
+ You are an AI assistant that follows instruction extremely well. Help as much as you can.
274
+ ### User:
275
+ Quote text from the CONTENT to answer the QUESTION below.
276
+ QUESTION: {question}
277
+ CONTENT: {summaries}
278
+ ### Response:
279
+ """
280
+
281
+ instruction_prompt_template_orca_rev = """
282
+ ### System:
283
+ You are an AI assistant that follows instruction extremely well. Help as much as you can.
284
+ ### User:
285
+ Answer the QUESTION with a short response using information from the following CONTENT.
286
+ QUESTION: {question}
287
+ CONTENT: {summaries}
288
+
289
+ ### Response:"""
290
 
291
  instruction_prompt_mistral_orca = """<|im_start|>system\n
292
+ You are an AI assistant that follows instruction extremely well. Help as much as you can.
293
+ <|im_start|>user\n
294
+ Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
295
+ CONTENT: {summaries}
296
+ QUESTION: {question}\n
297
+ Answer:<|im_end|>"""
298
 
299
  instruction_prompt_tinyllama_orca = """<|im_start|>system\n
300
+ You are an AI assistant that follows instruction extremely well. Help as much as you can.
301
+ <|im_start|>user\n
302
+ Answer the QUESTION using information from the following CONTENT. Only quote text that directly answers the question and nothing more. If you can't find an answer to the question, respond with "Sorry, I can't find an answer to that question.".
303
+ CONTENT: {summaries}
304
+ QUESTION: {question}\n
305
+ Answer:<|im_end|>"""
306
+
307
+ instruction_prompt_marx = """
308
+ ### HUMAN:
309
+ Answer the QUESTION using information from the following CONTENT.
310
+ CONTENT: {summaries}
311
+ QUESTION: {question}
312
+
313
+ ### RESPONSE:
314
+ """
315
 
316
  if model_type == "Flan Alpaca":
317
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
318
  elif model_type == "Orca Mini":
319
+ INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_wizard_orca, input_variables=['question', 'summaries'])
320
 
321
  return INSTRUCTION_PROMPT, CONTENT_PROMPT
322
 
 
329
  new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
330
 
331
 
332
+ docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 10, out_passages = 2,
333
  vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
334
  #vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
335
 
 
430
 
431
  gen_config = CtransGenGenerationConfig()
432
 
433
+ print(vars(gen_config))
434
+
435
  # Pull the generated text from the streamer, and update the model output.
436
  start = time.time()
437
  NUM_TOKENS=0
 
683
  return ''.join(content), meta[0], meta[-1]
684
 
685
  def get_parent_content_and_meta(vstore_docs, width, target):
686
+ #target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
687
+ target_range = range(max(0, target), min(len(vstore_docs), target + width + 1)) # Now only selects extra passages AFTER the found passage
688
  parent_vstore_out = [vstore_docs[i] for i in target_range]
689
 
690
  content_str_out, meta_first_out, meta_last_out = [], [], []
chatfuncs/ingest.py CHANGED
@@ -38,7 +38,7 @@ from pypdf import PdfReader
38
  PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
39
  # -
40
 
41
- split_strat = ["\n\n", "\n", ".", "!", "?", ","]
42
  chunk_size = 500
43
  chunk_overlap = 0
44
  start_index = True
 
38
  PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
39
  # -
40
 
41
+ split_strat = ["\n\n", "\n", ". ", "! ", "? "]
42
  chunk_size = 500
43
  chunk_overlap = 0
44
  start_index = True