oo-preview-gpt4-200k / config.yml
winglian's picture
Update config.yml
1de5ce5
---
model_url: Open-Orca/OpenOrca_Preview1-200k-GPT4_LLaMA-13B
typer:
delay: 0.1
runpod:
endpoint_id: 1c6291362ubuqc
prefer_async: true
llm:
top_k: 40
top_p: 0.9
temperature: 0.8
repetition_penalty:
last_n_tokens:
seed: -1
batch_size: 8
threads: -1
stop:
- "</s>"
queue:
max_size: 16
concurrency_count: 3 # recommend setting this no larger than your current