Spaces:
Runtime error
Runtime error
gorkemgoknar
commited on
Commit
•
5a7a07c
1
Parent(s):
ae3c32a
Update app.py
Browse files
app.py
CHANGED
@@ -160,6 +160,8 @@ from llama_cpp import Llama
|
|
160 |
# 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
|
161 |
GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
|
162 |
|
|
|
|
|
163 |
LLAMA_VERBOSE=False
|
164 |
print("Running LLM Mistral")
|
165 |
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
@@ -176,8 +178,9 @@ def format_prompt_mistral(message, history, system_message=system_message,system
|
|
176 |
for user_prompt, bot_response in history:
|
177 |
prompt += f"[INST] {user_prompt} [/INST]"
|
178 |
prompt += f" {bot_response}</s> "
|
179 |
-
|
180 |
-
|
|
|
181 |
prompt += f"[INST] {message} [/INST]"
|
182 |
return prompt
|
183 |
|
@@ -211,7 +214,7 @@ def generate_local(
|
|
211 |
temperature=0.8,
|
212 |
max_tokens=256,
|
213 |
top_p=0.95,
|
214 |
-
stop =
|
215 |
):
|
216 |
temperature = float(temperature)
|
217 |
if temperature < 1e-2:
|
@@ -236,6 +239,7 @@ def generate_local(
|
|
236 |
|
237 |
|
238 |
try:
|
|
|
239 |
stream = llm(
|
240 |
formatted_prompt,
|
241 |
**generate_kwargs,
|
@@ -254,7 +258,7 @@ def generate_local(
|
|
254 |
return
|
255 |
|
256 |
|
257 |
-
output += response["choices"][0]["text"].replace("<|assistant|>","").replace("<|user|>","")
|
258 |
yield output
|
259 |
|
260 |
except Exception as e:
|
@@ -464,7 +468,7 @@ def get_sentence(history, chatbot_role,llm_model,system_prompt=""):
|
|
464 |
history[-1][1] = character.replace("<|assistant|>","")
|
465 |
# It is coming word by word
|
466 |
|
467 |
-
text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").strip())
|
468 |
if len(text_to_generate) > 1:
|
469 |
|
470 |
dif = len(text_to_generate) - len(sentence_list)
|
@@ -509,7 +513,7 @@ def get_sentence(history, chatbot_role,llm_model,system_prompt=""):
|
|
509 |
|
510 |
# return that final sentence token
|
511 |
try:
|
512 |
-
last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").strip())[-1]
|
513 |
sentence_hash = hash(last_sentence)
|
514 |
if sentence_hash not in sentence_hash_list:
|
515 |
if stored_sentence is not None and stored_sentence_hash is not None:
|
|
|
160 |
# 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
|
161 |
GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
|
162 |
|
163 |
+
LLM_STOP_WORDS= ["</s>","<|user|>","/s>"]
|
164 |
+
|
165 |
LLAMA_VERBOSE=False
|
166 |
print("Running LLM Mistral")
|
167 |
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
|
|
178 |
for user_prompt, bot_response in history:
|
179 |
prompt += f"[INST] {user_prompt} [/INST]"
|
180 |
prompt += f" {bot_response}</s> "
|
181 |
+
|
182 |
+
#if message=="":
|
183 |
+
# message="Hello"
|
184 |
prompt += f"[INST] {message} [/INST]"
|
185 |
return prompt
|
186 |
|
|
|
214 |
temperature=0.8,
|
215 |
max_tokens=256,
|
216 |
top_p=0.95,
|
217 |
+
stop = LLM_STOP_WORDS
|
218 |
):
|
219 |
temperature = float(temperature)
|
220 |
if temperature < 1e-2:
|
|
|
239 |
|
240 |
|
241 |
try:
|
242 |
+
print("LLM Input:", formatted_prompt)
|
243 |
stream = llm(
|
244 |
formatted_prompt,
|
245 |
**generate_kwargs,
|
|
|
258 |
return
|
259 |
|
260 |
|
261 |
+
output += response["choices"][0]["text"].replace("<|assistant|>","").replace("<|user|>","")
|
262 |
yield output
|
263 |
|
264 |
except Exception as e:
|
|
|
468 |
history[-1][1] = character.replace("<|assistant|>","")
|
469 |
# It is coming word by word
|
470 |
|
471 |
+
text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())
|
472 |
if len(text_to_generate) > 1:
|
473 |
|
474 |
dif = len(text_to_generate) - len(sentence_list)
|
|
|
513 |
|
514 |
# return that final sentence token
|
515 |
try:
|
516 |
+
last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1]
|
517 |
sentence_hash = hash(last_sentence)
|
518 |
if sentence_hash not in sentence_hash_list:
|
519 |
if stored_sentence is not None and stored_sentence_hash is not None:
|