gorkemgoknar commited on
Commit
cd11c8a
1 Parent(s): 0207d09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -154,7 +154,7 @@ from llama_cpp import Llama
154
  # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
155
  # else 35 full layers + XTTS works fine on T4 16GB
156
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
157
- GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 5))
158
 
159
  LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>"]
160
 
@@ -165,10 +165,10 @@ llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
165
 
166
 
167
  print("Running LLM Zephyr")
168
- llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
169
 
170
  print("Running Yi LLM")
171
- llm_yi = Llama(model_path=yi_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
172
 
173
 
174
  # Mistral formatter
 
154
  # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
155
  # else 35 full layers + XTTS works fine on T4 16GB
156
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
157
+ GPU_LAYERS=int(os.environ.get("GPU_LAYERS",35))
158
 
159
  LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>"]
160
 
 
165
 
166
 
167
  print("Running LLM Zephyr")
168
+ llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=round(GPU_LAYERS/2),max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
169
 
170
  print("Running Yi LLM")
171
+ llm_yi = Llama(model_path=yi_model_path,n_gpu_layers=round(GPU_LAYERS/2),max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
172
 
173
 
174
  # Mistral formatter