Sean-Case commited on
Commit
994ad90
·
1 Parent(s): 275393f

Upgraded large model to Mistral OpenOrca 7B Q4. More checks for empty questions.

Browse files
Files changed (2) hide show
  1. app.py +12 -14
  2. chatfuncs/chatfuncs.py +4 -4
app.py CHANGED
@@ -79,7 +79,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
79
  if torch_device is None:
80
  torch_device = chatf.torch_device
81
 
82
- if model_type == "Orca Mini (larger, slow)":
83
 
84
  gpu_config.update_gpu(gpu_layers)
85
  cpu_config.update_gpu(gpu_layers)
@@ -90,16 +90,14 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
90
  print(vars(cpu_config))
91
 
92
  try:
93
- #model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
94
- model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
95
  #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
96
- #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
 
97
  except:
98
- #model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
99
- model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
100
  #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
101
- #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
102
-
103
 
104
  tokenizer = []
105
 
@@ -138,7 +136,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
138
  return model_type, load_confirmation, model_type
139
 
140
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
141
- model_type = "Orca Mini (larger, slow)"
142
 
143
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
144
 
@@ -181,7 +179,7 @@ with block:
181
 
182
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
183
 
184
- gr.Markdown("Chat with PDF or web page documents. The default is a small model (Flan Alpaca), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative (Orca Mini (larger, slow)), can reason a little better, but is much slower (See Advanced tab).\n\nBy default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
185
 
186
  with gr.Row():
187
  current_source = gr.Textbox(label="Current data source(s)", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf", scale = 10)
@@ -197,7 +195,7 @@ with block:
197
 
198
  with gr.Row():
199
  message = gr.Textbox(
200
- label="What's your question?",
201
  lines=1,
202
  )
203
  with gr.Row():
@@ -231,14 +229,14 @@ with block:
231
  ingest_embed_out = gr.Textbox(label="File/webpage preparation progress")
232
 
233
  with gr.Tab("Advanced features"):
234
- model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Orca Mini (larger, slow)"])
235
  with gr.Row():
236
- gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=6, step = 1, visible=False)
237
  change_model_button = gr.Button(value="Load model", scale=0)
238
  load_text = gr.Text(label="Load status")
239
 
240
  gr.HTML(
241
- "<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
242
  )
243
 
244
  examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
 
79
  if torch_device is None:
80
  torch_device = chatf.torch_device
81
 
82
+ if model_type == "Mistral Open Orca (larger, slow)":
83
 
84
  gpu_config.update_gpu(gpu_layers)
85
  cpu_config.update_gpu(gpu_layers)
 
90
  print(vars(cpu_config))
91
 
92
  try:
93
+ #model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
 
94
  #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
95
+ model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='llama', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
96
+
97
  except:
98
+ #model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
 
99
  #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
100
+ model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='llama', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
 
101
 
102
  tokenizer = []
103
 
 
136
  return model_type, load_confirmation, model_type
137
 
138
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
139
+ model_type = "Mistral Open Orca (larger, slow)"
140
 
141
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
142
 
 
179
 
180
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
181
 
182
+ gr.Markdown("Chat with PDF or web page documents. The default is a small model (Flan Alpaca), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative (Mistral Open Orca (larger, slow)), can reason a little better, but is much slower (See Advanced tab).\n\nBy default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
183
 
184
  with gr.Row():
185
  current_source = gr.Textbox(label="Current data source(s)", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf", scale = 10)
 
195
 
196
  with gr.Row():
197
  message = gr.Textbox(
198
+ label="Enter your question here.",
199
  lines=1,
200
  )
201
  with gr.Row():
 
229
  ingest_embed_out = gr.Textbox(label="File/webpage preparation progress")
230
 
231
  with gr.Tab("Advanced features"):
232
+ model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Mistral Open Orca (larger, slow)"])
233
  with gr.Row():
234
+ gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=5, step = 1, visible=True)
235
  change_model_button = gr.Button(value="Load model", scale=0)
236
  load_text = gr.Text(label="Load status")
237
 
238
  gr.HTML(
239
+ "<center>This app is based on the models Flan Alpaca and Mistral Open Orca. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
240
  )
241
 
242
  examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
chatfuncs/chatfuncs.py CHANGED
@@ -315,8 +315,8 @@ QUESTION: {question}
315
 
316
  if model_type == "Flan Alpaca (small, fast)":
317
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
318
- elif model_type == "Orca Mini (larger, slow)":
319
- INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_wizard_orca, input_variables=['question', 'summaries'])
320
 
321
  return INSTRUCTION_PROMPT, CONTENT_PROMPT
322
 
@@ -360,7 +360,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt, content
360
  def create_full_prompt(user_input, history, extracted_memory, vectorstore, embeddings, model_type):
361
 
362
  if not user_input.strip():
363
- return history, "", ""
364
 
365
  #if chain_agent is None:
366
  # history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
@@ -434,7 +434,7 @@ def produce_streaming_answer_chatbot(history, full_prompt, model_type):
434
  print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
435
  print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
436
 
437
- elif model_type == "Orca Mini (larger, slow)":
438
  tokens = model.tokenize(full_prompt)
439
 
440
  gen_config = CtransGenGenerationConfig()
 
315
 
316
  if model_type == "Flan Alpaca (small, fast)":
317
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
318
+ elif model_type == "Mistral Open Orca (larger, slow)":
319
+ INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_mistral_orca, input_variables=['question', 'summaries'])
320
 
321
  return INSTRUCTION_PROMPT, CONTENT_PROMPT
322
 
 
360
  def create_full_prompt(user_input, history, extracted_memory, vectorstore, embeddings, model_type):
361
 
362
  if not user_input.strip():
363
+ return history, "", "Respond with 'Please enter a question.' RESPONSE:"
364
 
365
  #if chain_agent is None:
366
  # history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
 
434
  print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
435
  print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
436
 
437
+ elif model_type == "Mistral Open Orca (larger, slow)":
438
  tokens = model.tokenize(full_prompt)
439
 
440
  gen_config = CtransGenGenerationConfig()