eliujl commited on
Commit
11c3099
1 Parent(s): ac1251c

Updated local LLM support

Browse files

Added Mixtral model support. Corrected local LLM model_path.

Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -30,6 +30,8 @@ local_model_tuples = [
30
  (2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
31
  (3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
32
  (4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
 
 
33
  ]
34
  local_model_names = [t[1] for t in local_model_tuples]
35
  langchain.verbose = False
@@ -162,28 +164,33 @@ def use_local_llm(r_llm, local_llm_path):
162
  model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
163
  model_path = os.path.join( local_llm_path, model_name, model_file )
164
  model_path = os.path.normpath( model_path )
 
 
165
  if not os.path.exists(model_path):
166
  print("model not existing at ", model_path, "\n")
167
  model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
168
  #cache_dir=local_llm_path,
169
- local_dir=local_llm_path, local_dir_use_symlinks=False)
 
 
170
  print("\n model downloaded at path=",model_path)
171
  else:
172
  print("model existing at ", model_path)
173
 
174
  llm = LlamaCpp(
175
  model_path=model_path,
176
- temperature=0.0,
177
- n_batch=300,
178
  n_ctx=4000,
179
  max_tokens=2000,
180
- n_gpu_layers=10,
181
- n_threads=12,
182
- top_p=1,
183
- repeat_penalty=1.15,
184
- verbose=False,
185
- callback_manager=callback_manager,
186
- streaming=True,
 
187
  # verbose=True, # Verbose is required to pass to the callback manager
188
  )
189
  return llm
@@ -193,6 +200,7 @@ def setup_prompt(r_llm):
193
  B_INST, E_INST = "[INST]", "[/INST]"
194
  B_SYS_LLAMA, E_SYS_LLAMA = "<<SYS>>\n", "\n<</SYS>>\n\n"
195
  B_SYS_MIS, E_SYS_MIS = "<s> ", "</s> "
 
196
  system_prompt = """Answer the question in your own words as truthfully as possible from the context given to you.
197
  Supply sufficient information, evidence, reasoning, source from the context, etc., to justify your answer with details and logic.
198
  Think step by step and do not jump to conclusion during your reasoning at the beginning.
@@ -213,8 +221,13 @@ def setup_prompt(r_llm):
213
  entry = local_model_names.index(r_llm)
214
  if local_model_tuples[entry][4] == 'llama':
215
  template = B_INST + B_SYS_LLAMA + system_prompt + E_SYS_LLAMA + instruction + E_INST
216
- else:
217
  template = B_SYS_MIS + B_INST + system_prompt + E_INST + E_SYS_MIS + B_INST + instruction + E_INST
 
 
 
 
 
218
  prompt = PromptTemplate(
219
  input_variables=["context", "chat_history", "question"], template=template
220
  )
 
30
  (2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
31
  (3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
32
  (4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
33
+ (5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
34
+ (6, 'mixtral_inst', "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF", "mixtral-8x7b-instruct-v0.1.Q2_K.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"),
35
  ]
36
  local_model_names = [t[1] for t in local_model_tuples]
37
  langchain.verbose = False
 
164
  model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
165
  model_path = os.path.join( local_llm_path, model_name, model_file )
166
  model_path = os.path.normpath( model_path )
167
+ model_dir = os.path.join( local_llm_path, model_name )
168
+ model_dir = os.path.normpath( model_dir )
169
  if not os.path.exists(model_path):
170
  print("model not existing at ", model_path, "\n")
171
  model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
172
  #cache_dir=local_llm_path,
173
+ #local_dir=local_llm_path,
174
+ local_dir=model_dir,
175
+ local_dir_use_symlinks=False)
176
  print("\n model downloaded at path=",model_path)
177
  else:
178
  print("model existing at ", model_path)
179
 
180
  llm = LlamaCpp(
181
  model_path=model_path,
182
+ # temperature=0.0,
183
+ # n_batch=300,
184
  n_ctx=4000,
185
  max_tokens=2000,
186
+ # n_gpu_layers=10,
187
+ # n_threads=12,
188
+ # top_p=1,
189
+ # repeat_penalty=1.15,
190
+ # verbose=False,
191
+ # callback_manager=callback_manager,
192
+ # streaming=True,
193
+ # chat_format="llama-2",
194
  # verbose=True, # Verbose is required to pass to the callback manager
195
  )
196
  return llm
 
200
  B_INST, E_INST = "[INST]", "[/INST]"
201
  B_SYS_LLAMA, E_SYS_LLAMA = "<<SYS>>\n", "\n<</SYS>>\n\n"
202
  B_SYS_MIS, E_SYS_MIS = "<s> ", "</s> "
203
+ B_SYS_MIXTRAL, E_SYS_MIXTRAL = "<s>[INST]", "[/INST]</s>[INST]"
204
  system_prompt = """Answer the question in your own words as truthfully as possible from the context given to you.
205
  Supply sufficient information, evidence, reasoning, source from the context, etc., to justify your answer with details and logic.
206
  Think step by step and do not jump to conclusion during your reasoning at the beginning.
 
221
  entry = local_model_names.index(r_llm)
222
  if local_model_tuples[entry][4] == 'llama':
223
  template = B_INST + B_SYS_LLAMA + system_prompt + E_SYS_LLAMA + instruction + E_INST
224
+ elif local_model_tuples[entry][4] == 'mistral':
225
  template = B_SYS_MIS + B_INST + system_prompt + E_INST + E_SYS_MIS + B_INST + instruction + E_INST
226
+ elif local_model_tuples[entry][4] == 'mixtral':
227
+ template = B_SYS_MIXTRAL + system_prompt + E_SYS_MIXTRAL + B_INST + instruction + E_INST
228
+ else:
229
+ # Handle other models or raise an exception
230
+ pass
231
  prompt = PromptTemplate(
232
  input_variables=["context", "chat_history", "question"], template=template
233
  )