vector_db_path = "./chromadb" # path to the vector database embeddings_model = "BAAI/bge-small-en" # embeddings model to use to generate vectors llm_path = "./mistral-7b-openorca.Q5_K_M.gguf" # path to the LLM model device = "cpu" # device to use for the LLM model, "cuda" or "cpu n_gpu_layers = 0 # Change this value based on your model and your GPU VRAM pool. Change to 0 if you are using a CPU. n_batch = 256 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. context_length = 8000 # length of the context to use for the LLM model temperature = 0.0 # temperature to use for the LLM model top_p = 1.0 # top_p to use for the LLM model max_tokens = 2000 # maximum number of tokens to generate from the LLM model