gorkemgoknar commited on
Commit
43356c3
1 Parent(s): cd11c8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -156,7 +156,7 @@ from llama_cpp import Llama
156
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
157
  GPU_LAYERS=int(os.environ.get("GPU_LAYERS",35))
158
 
159
- LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>"]
160
 
161
  LLAMA_VERBOSE=False
162
  print("Running LLM Mistral as InferenceClient")
@@ -283,7 +283,7 @@ def generate_local(
283
  output = ""
284
  for response in stream:
285
  character = response.token.text
286
- if "<|user|>" in character:
287
  # end of context
288
  return
289
 
@@ -304,7 +304,7 @@ def generate_local(
304
  for response in stream:
305
  character= response["choices"][0]["text"]
306
 
307
- if "<|user|>" in character:
308
  # end of context
309
  return
310
 
 
156
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
157
  GPU_LAYERS=int(os.environ.get("GPU_LAYERS",35))
158
 
159
+ LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
160
 
161
  LLAMA_VERBOSE=False
162
  print("Running LLM Mistral as InferenceClient")
 
283
  output = ""
284
  for response in stream:
285
  character = response.token.text
286
+ if character in LLM_STOP_WORDS:
287
  # end of context
288
  return
289
 
 
304
  for response in stream:
305
  character= response["choices"][0]["text"]
306
 
307
+ if character in LLM_STOP_WORDS:
308
  # end of context
309
  return
310