gorkemgoknar commited on
Commit
5a7a07c
1 Parent(s): ae3c32a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -160,6 +160,8 @@ from llama_cpp import Llama
160
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
161
  GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
162
 
 
 
163
  LLAMA_VERBOSE=False
164
  print("Running LLM Mistral")
165
  llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
@@ -176,8 +178,9 @@ def format_prompt_mistral(message, history, system_message=system_message,system
176
  for user_prompt, bot_response in history:
177
  prompt += f"[INST] {user_prompt} [/INST]"
178
  prompt += f" {bot_response}</s> "
179
- if message=="":
180
- message="Hello"
 
181
  prompt += f"[INST] {message} [/INST]"
182
  return prompt
183
 
@@ -211,7 +214,7 @@ def generate_local(
211
  temperature=0.8,
212
  max_tokens=256,
213
  top_p=0.95,
214
- stop = ["</s>","<|user|>"]
215
  ):
216
  temperature = float(temperature)
217
  if temperature < 1e-2:
@@ -236,6 +239,7 @@ def generate_local(
236
 
237
 
238
  try:
 
239
  stream = llm(
240
  formatted_prompt,
241
  **generate_kwargs,
@@ -254,7 +258,7 @@ def generate_local(
254
  return
255
 
256
 
257
- output += response["choices"][0]["text"].replace("<|assistant|>","").replace("<|user|>","").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","")
258
  yield output
259
 
260
  except Exception as e:
@@ -464,7 +468,7 @@ def get_sentence(history, chatbot_role,llm_model,system_prompt=""):
464
  history[-1][1] = character.replace("<|assistant|>","")
465
  # It is coming word by word
466
 
467
- text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").strip())
468
  if len(text_to_generate) > 1:
469
 
470
  dif = len(text_to_generate) - len(sentence_list)
@@ -509,7 +513,7 @@ def get_sentence(history, chatbot_role,llm_model,system_prompt=""):
509
 
510
  # return that final sentence token
511
  try:
512
- last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").strip())[-1]
513
  sentence_hash = hash(last_sentence)
514
  if sentence_hash not in sentence_hash_list:
515
  if stored_sentence is not None and stored_sentence_hash is not None:
 
160
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
161
  GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
162
 
163
+ LLM_STOP_WORDS= ["</s>","<|user|>","/s>"]
164
+
165
  LLAMA_VERBOSE=False
166
  print("Running LLM Mistral")
167
  llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
 
178
  for user_prompt, bot_response in history:
179
  prompt += f"[INST] {user_prompt} [/INST]"
180
  prompt += f" {bot_response}</s> "
181
+
182
+ #if message=="":
183
+ # message="Hello"
184
  prompt += f"[INST] {message} [/INST]"
185
  return prompt
186
 
 
214
  temperature=0.8,
215
  max_tokens=256,
216
  top_p=0.95,
217
+ stop = LLM_STOP_WORDS
218
  ):
219
  temperature = float(temperature)
220
  if temperature < 1e-2:
 
239
 
240
 
241
  try:
242
+ print("LLM Input:", formatted_prompt)
243
  stream = llm(
244
  formatted_prompt,
245
  **generate_kwargs,
 
258
  return
259
 
260
 
261
+ output += response["choices"][0]["text"].replace("<|assistant|>","").replace("<|user|>","")
262
  yield output
263
 
264
  except Exception as e:
 
468
  history[-1][1] = character.replace("<|assistant|>","")
469
  # It is coming word by word
470
 
471
+ text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())
472
  if len(text_to_generate) > 1:
473
 
474
  dif = len(text_to_generate) - len(sentence_list)
 
513
 
514
  # return that final sentence token
515
  try:
516
+ last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1]
517
  sentence_hash = hash(last_sentence)
518
  if sentence_hash not in sentence_hash_list:
519
  if stored_sentence is not None and stored_sentence_hash is not None: