crystalkalem commited on
Commit
3725f8e
1 Parent(s): 0ecd162

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -11,14 +11,14 @@ chat_template = os.getenv('CHAT_TEMPLATE')
11
  # Interface variables
12
  model_name = model_id.split('/')[1].split('-GGUF')[0]
13
  title = f"{model_name}"
14
- description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})! Responce Time takes between 50 and 150 seconds, its not great."
15
 
16
  # Initialize the LLM
17
  llm = Llama(model_path="model.gguf",
18
- n_ctx=32000,
19
- n_threads=4,
20
  temp = 0.75,
21
- n_vocab=16000,
22
  n_gpu_layers=-1,
23
  chat_format=chat_template)
24
 
 
11
  # Interface variables
12
  model_name = model_id.split('/')[1].split('-GGUF')[0]
13
  title = f"{model_name}"
14
+ description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})! Context length = 4096, new token limit = 1024. Responce Time takes between 50 and 150 seconds, its not great."
15
 
16
  # Initialize the LLM
17
  llm = Llama(model_path="model.gguf",
18
+ n_ctx=4096,
19
+ n_threads=2,
20
  temp = 0.75,
21
+ n_vocab=1024,
22
  n_gpu_layers=-1,
23
  chat_format=chat_template)
24