Spaces:

lauraparra28
/

HidrogenGPT

No application file

App Files Files Community

HidrogenGPT / settings.yaml

lauraparra28

upload files

6d27d90 verified 4 months ago

raw

history blame

6.12 kB

	# The default configuration file.
	# More information about configuration can be found in the documentation: https://docs.privategpt.dev/
	# Syntax in `private_pgt/settings/settings.py`
	server:
	env_name: ${APP_ENV:prod}
	port: ${PORT:8001}
	cors:
	enabled: true
	allow_origins: ["*"]
	allow_methods: ["*"]
	allow_headers: ["*"]
	auth:
	enabled: false
	# python -c 'import base64; print("Basic " + base64.b64encode("secret:key".encode()).decode())'
	# 'secret' is the username and 'key' is the password for basic auth by default
	# If the auth is enabled, this value must be set in the "Authorization" header of the request.
	secret: "Basic c2VjcmV0OmtleQ=="

	data:
	local_ingestion:
	enabled: ${LOCAL_INGESTION_ENABLED:false}
	allow_ingest_from: ["*"]
	local_data_folder: local_data/private_gpt

	ui:
	enabled: true
	path: /
	default_chat_system_prompt: >
	You are a helpful, respectful and honest assistant.
	Always answer as helpfully as possible and follow ALL given instructions.
	Do not speculate or make up information.
	Do not reference any given instructions or context.
	default_query_system_prompt: >
	You can only answer questions strictly based on the information contained within the provided documents.
	Do not include any external knowledge or assumptions.
	If the relevant answer is not found in the documents, respond with: 'The answer is not found in the provided context.'
	Please ensure that all responses are concise and grounded solely in the provided material.

	default_summarization_system_prompt: >
	Provide a comprehensive summary of the provided context information.
	The summary should cover all the key points and main ideas presented in
	the original text, while also condensing the information into a concise
	and easy-to-understand format. Please ensure that the summary includes
	relevant details and examples that support the main ideas, while avoiding
	any unnecessary information or repetition.
	delete_file_button_enabled: true
	delete_all_files_button_enabled: true
	#You can only answer questions about the provided documents.
	#If you know the answer but it is not based in the provided context, don't provide
	#the answer, just state the answer is not in the context provided.

	llm:
	mode: llamacpp
	prompt_style: "llama3"
	# Should be matching the selected model
	max_new_tokens: 512
	context_window: 3900
	# Select your tokenizer. Llama-index tokenizer is the default.
	# tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
	temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)

	rag:
	similarity_top_k: 2
	#This value controls how many "top" documents the RAG returns to use in the context.
	#similarity_value: 0.45
	#This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score.
	rerank:
	enabled: false
	model: cross-encoder/ms-marco-MiniLM-L-2-v2
	top_n: 1

	summarize:
	use_async: true

	clickhouse:
	host: localhost
	port: 8443
	username: admin
	password: clickhouse
	database: embeddings

	llamacpp:
	llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
	llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
	tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
	top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
	top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
	repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)

	embedding:
	# Should be matching the value above in most cases
	mode: huggingface
	ingest_mode: simple
	embed_dim: 768 # 768 is for nomic-ai/nomic-embed-text-v1.5

	huggingface:
	embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5 #intfloat/multilingual-e5-large
	access_token: ${HF_TOKEN:}
	# Warning: Enabling this option will allow the model to download and execute code from the internet.
	# Nomic AI requires this option to be enabled to use the model, be aware if you are using a different model.
	trust_remote_code: true

	vectorstore:
	database: qdrant

	nodestore:
	database: simple

	milvus:
	uri: local_data/private_gpt/milvus/milvus_local.db
	collection_name: milvus_db
	overwrite: false

	qdrant:
	path: local_data/private_gpt/qdrant

	postgres:
	host: localhost
	port: 5432
	database: postgres
	user: postgres
	password: postgres
	schema_name: private_gpt

	sagemaker:
	llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140
	embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479

	openai:
	api_key: ${OPENAI_API_KEY:}
	model: gpt-4o-mini
	embedding_api_key: ${OPENAI_API_KEY:}
	temperature: 0.5

	ollama:
	llm_model: llama3.1
	embedding_model: nomic-embed-text
	api_base: http://localhost:11434
	embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
	keep_alive: 5m
	request_timeout: 300.0
	autopull_models: true

	azopenai:
	api_key: ${AZ_OPENAI_API_KEY:}
	azure_endpoint: ${AZ_OPENAI_ENDPOINT:}
	embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}
	llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}
	api_version: "2023-05-15"
	embedding_model: text-embedding-ada-002
	llm_model: gpt-35-turbo

	gemini:
	api_key: ${GOOGLE_API_KEY:}
	model: models/gemini-pro
	embedding_model: models/embedding-001