Spaces:
Sleeping
Sleeping
eliujl
commited on
Commit
•
11c3099
1
Parent(s):
ac1251c
Updated local LLM support
Browse filesAdded Mixtral model support. Corrected local LLM model_path.
app.py
CHANGED
@@ -30,6 +30,8 @@ local_model_tuples = [
|
|
30 |
(2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
31 |
(3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
32 |
(4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
|
|
|
|
33 |
]
|
34 |
local_model_names = [t[1] for t in local_model_tuples]
|
35 |
langchain.verbose = False
|
@@ -162,28 +164,33 @@ def use_local_llm(r_llm, local_llm_path):
|
|
162 |
model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
|
163 |
model_path = os.path.join( local_llm_path, model_name, model_file )
|
164 |
model_path = os.path.normpath( model_path )
|
|
|
|
|
165 |
if not os.path.exists(model_path):
|
166 |
print("model not existing at ", model_path, "\n")
|
167 |
model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
|
168 |
#cache_dir=local_llm_path,
|
169 |
-
local_dir=local_llm_path,
|
|
|
|
|
170 |
print("\n model downloaded at path=",model_path)
|
171 |
else:
|
172 |
print("model existing at ", model_path)
|
173 |
|
174 |
llm = LlamaCpp(
|
175 |
model_path=model_path,
|
176 |
-
temperature=0.0,
|
177 |
-
n_batch=300,
|
178 |
n_ctx=4000,
|
179 |
max_tokens=2000,
|
180 |
-
n_gpu_layers=10,
|
181 |
-
n_threads=12,
|
182 |
-
top_p=1,
|
183 |
-
repeat_penalty=1.15,
|
184 |
-
verbose=False,
|
185 |
-
callback_manager=callback_manager,
|
186 |
-
streaming=True,
|
|
|
187 |
# verbose=True, # Verbose is required to pass to the callback manager
|
188 |
)
|
189 |
return llm
|
@@ -193,6 +200,7 @@ def setup_prompt(r_llm):
|
|
193 |
B_INST, E_INST = "[INST]", "[/INST]"
|
194 |
B_SYS_LLAMA, E_SYS_LLAMA = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
195 |
B_SYS_MIS, E_SYS_MIS = "<s> ", "</s> "
|
|
|
196 |
system_prompt = """Answer the question in your own words as truthfully as possible from the context given to you.
|
197 |
Supply sufficient information, evidence, reasoning, source from the context, etc., to justify your answer with details and logic.
|
198 |
Think step by step and do not jump to conclusion during your reasoning at the beginning.
|
@@ -213,8 +221,13 @@ def setup_prompt(r_llm):
|
|
213 |
entry = local_model_names.index(r_llm)
|
214 |
if local_model_tuples[entry][4] == 'llama':
|
215 |
template = B_INST + B_SYS_LLAMA + system_prompt + E_SYS_LLAMA + instruction + E_INST
|
216 |
-
|
217 |
template = B_SYS_MIS + B_INST + system_prompt + E_INST + E_SYS_MIS + B_INST + instruction + E_INST
|
|
|
|
|
|
|
|
|
|
|
218 |
prompt = PromptTemplate(
|
219 |
input_variables=["context", "chat_history", "question"], template=template
|
220 |
)
|
|
|
30 |
(2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
31 |
(3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
32 |
(4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
33 |
+
(5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
|
34 |
+
(6, 'mixtral_inst', "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF", "mixtral-8x7b-instruct-v0.1.Q2_K.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"),
|
35 |
]
|
36 |
local_model_names = [t[1] for t in local_model_tuples]
|
37 |
langchain.verbose = False
|
|
|
164 |
model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
|
165 |
model_path = os.path.join( local_llm_path, model_name, model_file )
|
166 |
model_path = os.path.normpath( model_path )
|
167 |
+
model_dir = os.path.join( local_llm_path, model_name )
|
168 |
+
model_dir = os.path.normpath( model_dir )
|
169 |
if not os.path.exists(model_path):
|
170 |
print("model not existing at ", model_path, "\n")
|
171 |
model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
|
172 |
#cache_dir=local_llm_path,
|
173 |
+
#local_dir=local_llm_path,
|
174 |
+
local_dir=model_dir,
|
175 |
+
local_dir_use_symlinks=False)
|
176 |
print("\n model downloaded at path=",model_path)
|
177 |
else:
|
178 |
print("model existing at ", model_path)
|
179 |
|
180 |
llm = LlamaCpp(
|
181 |
model_path=model_path,
|
182 |
+
# temperature=0.0,
|
183 |
+
# n_batch=300,
|
184 |
n_ctx=4000,
|
185 |
max_tokens=2000,
|
186 |
+
# n_gpu_layers=10,
|
187 |
+
# n_threads=12,
|
188 |
+
# top_p=1,
|
189 |
+
# repeat_penalty=1.15,
|
190 |
+
# verbose=False,
|
191 |
+
# callback_manager=callback_manager,
|
192 |
+
# streaming=True,
|
193 |
+
# chat_format="llama-2",
|
194 |
# verbose=True, # Verbose is required to pass to the callback manager
|
195 |
)
|
196 |
return llm
|
|
|
200 |
B_INST, E_INST = "[INST]", "[/INST]"
|
201 |
B_SYS_LLAMA, E_SYS_LLAMA = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
202 |
B_SYS_MIS, E_SYS_MIS = "<s> ", "</s> "
|
203 |
+
B_SYS_MIXTRAL, E_SYS_MIXTRAL = "<s>[INST]", "[/INST]</s>[INST]"
|
204 |
system_prompt = """Answer the question in your own words as truthfully as possible from the context given to you.
|
205 |
Supply sufficient information, evidence, reasoning, source from the context, etc., to justify your answer with details and logic.
|
206 |
Think step by step and do not jump to conclusion during your reasoning at the beginning.
|
|
|
221 |
entry = local_model_names.index(r_llm)
|
222 |
if local_model_tuples[entry][4] == 'llama':
|
223 |
template = B_INST + B_SYS_LLAMA + system_prompt + E_SYS_LLAMA + instruction + E_INST
|
224 |
+
elif local_model_tuples[entry][4] == 'mistral':
|
225 |
template = B_SYS_MIS + B_INST + system_prompt + E_INST + E_SYS_MIS + B_INST + instruction + E_INST
|
226 |
+
elif local_model_tuples[entry][4] == 'mixtral':
|
227 |
+
template = B_SYS_MIXTRAL + system_prompt + E_SYS_MIXTRAL + B_INST + instruction + E_INST
|
228 |
+
else:
|
229 |
+
# Handle other models or raise an exception
|
230 |
+
pass
|
231 |
prompt = PromptTemplate(
|
232 |
input_variables=["context", "chat_history", "question"], template=template
|
233 |
)
|