| """Central configuration β Pydantic BaseSettings env-bound. |
| |
| Single source of truth: the ``settings = Settings()`` singleton. Every module |
| imports this. The ``.env`` file is automatically loaded (python-dotenv) if it |
| exists in the project root. |
| |
| Profiles: |
| * ``LLM_PROFILE=vllm`` β Qwen 2.5 on AMD MI300X via vLLM (OpenAI-compat). Production default. |
| * ``LLM_PROFILE=ollama`` β local Ollama (Qwen 2.5 7B Instruct). Dev / data-privacy. |
| * ``LLM_PROFILE=dummy`` β deterministic stub (CI / eval / load). |
| """ |
|
|
| from __future__ import annotations |
|
|
| from pathlib import Path |
| from typing import Literal |
|
|
| from pydantic import Field, computed_field |
| from pydantic_settings import BaseSettings, SettingsConfigDict |
|
|
| |
| PROJECT_ROOT = Path(__file__).resolve().parent |
|
|
|
|
| class Settings(BaseSettings): |
| """Full application runtime configuration. |
| |
| Every field reads from .env or env vars, with defaults. If .env does not |
| exist, the defaults run. |
| """ |
|
|
| model_config = SettingsConfigDict( |
| env_file=PROJECT_ROOT / ".env", |
| env_file_encoding="utf-8", |
| case_sensitive=False, |
| extra="ignore", |
| ) |
|
|
| |
| |
| |
| llm_profile: Literal["vllm", "ollama", "dummy"] = "vllm" |
| """Default LLM profile. Runtime override: |
| ``graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})``.""" |
|
|
| |
| vllm_base_url: str = "http://localhost:8000/v1" |
| """vLLM endpoint URL. In production: http://<mi300x-public-ip>:8000/v1""" |
|
|
| vllm_model: str = "Qwen/Qwen2.5-14B-Instruct" |
| """Model id served by vLLM. Alternatives: Qwen/Qwen2.5-32B-Instruct, Qwen/Qwen2.5-7B-Instruct.""" |
|
|
| vllm_api_key: str | None = None |
| """Optional API key for vLLM. If unset, sent as 'EMPTY' (vLLM no-auth mode). |
| In production set a real key and start vLLM with --api-key <key>.""" |
|
|
| vllm_temperature: float = 0.0 |
| vllm_max_tokens: int = 4096 |
|
|
| |
| ollama_base_url: str = "http://localhost:11434" |
| ollama_model: str = "qwen2.5:7b-instruct" |
| ollama_temperature: float = 0.0 |
|
|
| |
| |
| |
| embedding_model: str = "BAAI/bge-m3" |
| """Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual EN/HU/DE/FR/...). |
| Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only).""" |
|
|
| |
| |
| |
| chroma_path: Path = Field(default=PROJECT_ROOT / "chroma_db") |
| chroma_collection: str = "documents" |
| checkpoint_db_path: Path = Field(default=PROJECT_ROOT / "data" / "checkpoints.sqlite") |
|
|
| |
| |
| |
| chunk_max_chars: int = 15_000 |
| chunk_overlap_chars: int = 500 |
| single_call_threshold: int = 30_000 |
| """If doc.full_text < this many chars, a single LLM call is enough (no chunking).""" |
|
|
| |
| chat_max_iterations: int = 10 |
| """Chat agent β tools loop max iterations β infinite-loop guard.""" |
|
|
| validator_max_retries: int = 2 |
| """Chat validator β agent retry count when source citations are missing.""" |
|
|
| dd_supervisor_max_iterations: int = 4 |
| """DD supervisor max iterations before forced synthesizer fallback.""" |
|
|
| |
| |
| |
| streamlit_port: int = 8501 |
|
|
| |
| |
| |
| langchain_tracing_v2: bool = False |
| langchain_api_key: str | None = None |
| langchain_project: str = "document-intelligence-amd" |
|
|
| |
| |
| |
| @computed_field |
| @property |
| def project_root(self) -> Path: |
| return PROJECT_ROOT |
|
|
| @computed_field |
| @property |
| def langsmith_enabled(self) -> bool: |
| return self.langchain_tracing_v2 and bool(self.langchain_api_key) |
|
|
| @computed_field |
| @property |
| def is_dummy(self) -> bool: |
| return self.llm_profile == "dummy" |
|
|
|
|
| |
| settings = Settings() |
|
|