ggml-runpod-ui / config.yml
winglian's picture
update manticore's endpoint
d8d503d
---
model_url: https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg
typer:
delay: 0.1
runpod:
endpoint_id: u6tv84bpomhfei
prefer_async: true
llm:
max_tokens: 600
top_k:
top_p:
temperature:
repetition_penalty:
last_n_tokens:
seed: -1
batch_size: 8
threads: -1
stop:
- "</s>"
queue:
max_size: 16
concurrency_count: 1 # recommend setting this no larger than your current