SiraH commited on
Commit
3f1433f
1 Parent(s): 6305f5c

reduce n_ctx

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -168,14 +168,14 @@ def split_docs(documents,chunk_size=1000):
168
  def load_llama2_llamaCpp():
169
  core_model_name = "llama-2-7b-chat.Q4_0.gguf"
170
  #n_gpu_layers = 32
171
- n_batch = 8
172
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
173
  llm = LlamaCpp(
174
  model_path=core_model_name,
175
  #n_gpu_layers=n_gpu_layers,
176
  n_batch=n_batch,
177
  callback_manager=callback_manager,
178
- verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 256
179
  )
180
  return llm
181
 
 
168
  def load_llama2_llamaCpp():
169
  core_model_name = "llama-2-7b-chat.Q4_0.gguf"
170
  #n_gpu_layers = 32
171
+ n_batch = 32
172
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
173
  llm = LlamaCpp(
174
  model_path=core_model_name,
175
  #n_gpu_layers=n_gpu_layers,
176
  n_batch=n_batch,
177
  callback_manager=callback_manager,
178
+ verbose=True,n_ctx = 1024, temperature = 0.1, max_tokens = 256
179
  )
180
  return llm
181