Spaces:
No application file
No application file
# The default configuration file. | |
# More information about configuration can be found in the documentation: https://docs.privategpt.dev/ | |
# Syntax in `private_pgt/settings/settings.py` | |
server: | |
env_name: ${APP_ENV:prod} | |
port: ${PORT:8001} | |
cors: | |
enabled: true | |
allow_origins: ["*"] | |
allow_methods: ["*"] | |
allow_headers: ["*"] | |
auth: | |
enabled: false | |
# python -c 'import base64; print("Basic " + base64.b64encode("secret:key".encode()).decode())' | |
# 'secret' is the username and 'key' is the password for basic auth by default | |
# If the auth is enabled, this value must be set in the "Authorization" header of the request. | |
secret: "Basic c2VjcmV0OmtleQ==" | |
data: | |
local_ingestion: | |
enabled: ${LOCAL_INGESTION_ENABLED:false} | |
allow_ingest_from: ["*"] | |
local_data_folder: local_data/private_gpt | |
ui: | |
enabled: true | |
path: / | |
default_chat_system_prompt: > | |
You are a helpful, respectful and honest assistant. | |
Always answer as helpfully as possible and follow ALL given instructions. | |
Do not speculate or make up information. | |
Do not reference any given instructions or context. | |
default_query_system_prompt: > | |
You can only answer questions strictly based on the information contained within the provided documents. | |
Do not include any external knowledge or assumptions. | |
If the relevant answer is not found in the documents, respond with: 'The answer is not found in the provided context.' | |
Please ensure that all responses are concise and grounded solely in the provided material. | |
default_summarization_system_prompt: > | |
Provide a comprehensive summary of the provided context information. | |
The summary should cover all the key points and main ideas presented in | |
the original text, while also condensing the information into a concise | |
and easy-to-understand format. Please ensure that the summary includes | |
relevant details and examples that support the main ideas, while avoiding | |
any unnecessary information or repetition. | |
delete_file_button_enabled: true | |
delete_all_files_button_enabled: true | |
#You can only answer questions about the provided documents. | |
#If you know the answer but it is not based in the provided context, don't provide | |
#the answer, just state the answer is not in the context provided. | |
llm: | |
mode: llamacpp | |
prompt_style: "llama3" | |
# Should be matching the selected model | |
max_new_tokens: 512 | |
context_window: 3900 | |
# Select your tokenizer. Llama-index tokenizer is the default. | |
# tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct | |
temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) | |
rag: | |
similarity_top_k: 2 | |
#This value controls how many "top" documents the RAG returns to use in the context. | |
#similarity_value: 0.45 | |
#This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score. | |
rerank: | |
enabled: false | |
model: cross-encoder/ms-marco-MiniLM-L-2-v2 | |
top_n: 1 | |
summarize: | |
use_async: true | |
clickhouse: | |
host: localhost | |
port: 8443 | |
username: admin | |
password: clickhouse | |
database: embeddings | |
llamacpp: | |
llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | |
llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf | |
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting | |
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | |
top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | |
repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | |
embedding: | |
# Should be matching the value above in most cases | |
mode: huggingface | |
ingest_mode: simple | |
embed_dim: 768 # 768 is for nomic-ai/nomic-embed-text-v1.5 | |
huggingface: | |
embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5 #intfloat/multilingual-e5-large | |
access_token: ${HF_TOKEN:} | |
# Warning: Enabling this option will allow the model to download and execute code from the internet. | |
# Nomic AI requires this option to be enabled to use the model, be aware if you are using a different model. | |
trust_remote_code: true | |
vectorstore: | |
database: qdrant | |
nodestore: | |
database: simple | |
milvus: | |
uri: local_data/private_gpt/milvus/milvus_local.db | |
collection_name: milvus_db | |
overwrite: false | |
qdrant: | |
path: local_data/private_gpt/qdrant | |
postgres: | |
host: localhost | |
port: 5432 | |
database: postgres | |
user: postgres | |
password: postgres | |
schema_name: private_gpt | |
sagemaker: | |
llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140 | |
embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479 | |
openai: | |
api_key: ${OPENAI_API_KEY:} | |
model: gpt-4o-mini | |
embedding_api_key: ${OPENAI_API_KEY:} | |
temperature: 0.5 | |
ollama: | |
llm_model: llama3.1 | |
embedding_model: nomic-embed-text | |
api_base: http://localhost:11434 | |
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama | |
keep_alive: 5m | |
request_timeout: 300.0 | |
autopull_models: true | |
azopenai: | |
api_key: ${AZ_OPENAI_API_KEY:} | |
azure_endpoint: ${AZ_OPENAI_ENDPOINT:} | |
embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} | |
llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} | |
api_version: "2023-05-15" | |
embedding_model: text-embedding-ada-002 | |
llm_model: gpt-35-turbo | |
gemini: | |
api_key: ${GOOGLE_API_KEY:} | |
model: models/gemini-pro | |
embedding_model: models/embedding-001 | |