pabloce commited on
Commit
5c89384
·
verified ·
1 Parent(s): 0824852

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -50,10 +50,12 @@ def respond(
50
  llm = Llama(
51
  model_path=f"models/{model}",
52
  flash_attn=True,
 
53
  n_gpu_layers=81,
 
54
  n_ctx=8192,
55
  )
56
- provider = LlamaCppPythonProvider(llm, temperature, top_k, top_p, max_tokens, repeat_penalty)
57
 
58
  agent = LlamaCppAgent(
59
  provider,
@@ -63,7 +65,11 @@ def respond(
63
  )
64
 
65
  settings = provider.get_provider_default_settings()
 
 
 
66
  settings.max_tokens = max_tokens
 
67
  settings.stream = True
68
 
69
  messages = BasicChatHistory()
 
50
  llm = Llama(
51
  model_path=f"models/{model}",
52
  flash_attn=True,
53
+ n_threads=4,
54
  n_gpu_layers=81,
55
+ n_batch=1024,
56
  n_ctx=8192,
57
  )
58
+ provider = LlamaCppPythonProvider(llm)
59
 
60
  agent = LlamaCppAgent(
61
  provider,
 
65
  )
66
 
67
  settings = provider.get_provider_default_settings()
68
+ settings.temperature = temperature
69
+ settings.top_k = top_k
70
+ settings.top_p = top_p
71
  settings.max_tokens = max_tokens
72
+ settings.repeat_penalty = repeat_penalty
73
  settings.stream = True
74
 
75
  messages = BasicChatHistory()