gorkemgoknar commited on
Commit
331538a
1 Parent(s): a51d57b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -158,11 +158,11 @@ from llama_cpp import Llama
158
  # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
159
  # else 35 full layers + XTTS works fine on T4 16GB
160
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
161
- GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 30))
162
 
163
  LLAMA_VERBOSE=False
164
  print("Running LLM Mistral")
165
- llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
166
 
167
  print("Running LLM Zephyr")
168
  llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
@@ -191,19 +191,18 @@ def format_prompt_mistral(message, history, system_message=system_message,system
191
  # Zephyr formatter
192
  def format_prompt_zephyr(message, history, system_message=system_message):
193
  prompt = (
194
- "<|system|>\n" + system_message + "\n</s>"
195
  )
196
  for user_prompt, bot_response in history:
197
- prompt += f"<|user|>\n{user_prompt} </s>"
198
  prompt += f"<|assistant|>\n{bot_response}</s>"
199
  if message=="":
200
  message="Hello"
201
  prompt += f"<|user|>\n{message}</s>"
202
- prompt += f"<|assistant|>\n"
203
  print(prompt)
204
  return prompt
205
 
206
-
207
  def generate_local(
208
  prompt,
209
  history,
 
158
  # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
159
  # else 35 full layers + XTTS works fine on T4 16GB
160
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
161
+ GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 25))
162
 
163
  LLAMA_VERBOSE=False
164
  print("Running LLM Mistral")
165
+ llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS+10,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
166
 
167
  print("Running LLM Zephyr")
168
  llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
 
191
  # Zephyr formatter
192
  def format_prompt_zephyr(message, history, system_message=system_message):
193
  prompt = (
194
+ "<|system|>\n" + system_message + "</s>"
195
  )
196
  for user_prompt, bot_response in history:
197
+ prompt += f"<|user|>\n{user_prompt}</s>"
198
  prompt += f"<|assistant|>\n{bot_response}</s>"
199
  if message=="":
200
  message="Hello"
201
  prompt += f"<|user|>\n{message}</s>"
202
+ prompt += f"<|assistant|>"
203
  print(prompt)
204
  return prompt
205
 
 
206
  def generate_local(
207
  prompt,
208
  history,