--- model_url: Open-Orca/OpenOrca_Preview1-200k-GPT4_LLaMA-13B typer: delay: 0.1 runpod: endpoint_id: 1c6291362ubuqc prefer_async: true llm: top_k: 40 top_p: 0.9 temperature: 0.8 repetition_penalty: last_n_tokens: seed: -1 batch_size: 8 threads: -1 stop: - "" queue: max_size: 16 concurrency_count: 3 # recommend setting this no larger than your current