---
model_url: Open-Orca/OpenOrca_Preview1-200k-GPT4_LLaMA-13B
typer:
  delay: 0.1
runpod:
  endpoint_id: 1c6291362ubuqc
  prefer_async: true
llm:
  top_k: 40
  top_p: 0.9
  temperature: 0.8
  repetition_penalty: 
  last_n_tokens:
  seed: -1
  batch_size: 8
  threads: -1
  stop:
    - "</s>"
queue:
  max_size: 16
  concurrency_count: 3  # recommend setting this no larger than your current