--- hub: repo_id: RoversX/llama-2-7b-chat-hf-Qlora-Samantha-V2-ggml filename: ggml-model-q4_0.bin llama_cpp: n_ctx: 2048 n_gpu_layers: 40 # llama 13b has 40 layers chat: stop: - "" - "" - "### human:" - "human:" queue: max_size: 16 concurrency_count: 1 # leave this at 1, llama-cpp-python doesn't handle concurrent requests and will crash the entire app