GOXY / app /config.py
lasagnakanada
Increase model inference timeout to 60s and add retry for ReadTimeout
16381cf
"""
Application configuration management using Pydantic Settings.
This module provides type-safe, validated configuration loaded from
environment variables and .env files.
"""
from functools import lru_cache
from typing import List, Optional
from pydantic import Field, PostgresDsn, RedisDsn, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""
Application settings loaded from environment variables.
All settings are loaded from environment variables with optional .env file support.
Type validation is automatic via Pydantic.
"""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# Application Settings
app_name: str = Field(default="goxy-ml-service", description="Application name")
app_version: str = Field(default="0.1.0", description="Application version")
app_env: str = Field(
default="development", description="Environment (development/staging/production)"
)
debug: bool = Field(default=False, description="Enable debug mode")
log_level: str = Field(default="INFO", description="Logging level")
# Server Configuration
host: str = Field(default="0.0.0.0", description="Server host")
port: int = Field(default=8000, ge=1, le=65535, description="Server port")
workers: int = Field(default=4, ge=1, description="Number of worker processes")
reload: bool = Field(default=False, description="Enable auto-reload for development")
# Database Configuration
database_url: str = Field(
default="postgresql+asyncpg://goxy_user:goxy_password@localhost:5432/goxy_ml_db",
description="Database connection URL",
)
db_pool_size: int = Field(default=10, ge=1, description="Database connection pool size")
db_max_overflow: int = Field(default=20, ge=0, description="Maximum overflow connections")
db_pool_timeout: int = Field(default=30, ge=1, description="Connection pool timeout in seconds")
db_pool_recycle: int = Field(
default=3600, ge=0, description="Connection recycle time in seconds"
)
db_echo: bool = Field(default=False, description="Echo SQL queries for debugging")
# Redis Configuration (Optional)
redis_url: Optional[str] = Field(
default="redis://localhost:6379/0",
description="Redis connection URL",
)
redis_max_connections: int = Field(default=10, ge=1, description="Redis max connections")
redis_socket_timeout: int = Field(default=5, ge=1, description="Redis socket timeout")
redis_socket_connect_timeout: int = Field(default=5, ge=1, description="Redis connect timeout")
# Security Settings
secret_key: str = Field(
default="your-super-secret-key-change-this-in-production-min-32-chars",
min_length=32,
description="Secret key for JWT and encryption",
)
jwt_algorithm: str = Field(default="HS256", description="JWT signing algorithm")
jwt_expiration: int = Field(default=3600, ge=60, description="JWT token expiration in seconds")
jwt_refresh_expiration: int = Field(
default=604800,
ge=3600,
description="JWT refresh token expiration in seconds",
)
api_key_header: str = Field(default="X-API-Key", description="API key header name")
# CORS Settings
cors_origins: List[str] = Field(
default=[
"http://localhost:3000",
"http://localhost:8000",
"https://malasya-goxy.hf.space",
"*.hf.space",
],
description="Allowed CORS origins",
)
cors_credentials: bool = Field(default=True, description="Allow credentials in CORS")
cors_methods: List[str] = Field(default=["*"], description="Allowed HTTP methods")
cors_headers: List[str] = Field(default=["*"], description="Allowed headers")
# ML Model Configuration
llm_provider: str = Field(
default="grok",
description="LLM provider (grok|hf). grok=xAI API, hf=HuggingFace local models",
)
llm_model_name: str = Field(default="gpt2", description="LLM model name for generation")
model_name: str = Field(
default="gpt2", description="Hugging Face model name (deprecated, use llm_model_name)"
)
model_path: str = Field(default="./data/models/", description="Path to store models")
model_device: str = Field(default="cpu", description="Device for model inference (cpu/cuda)")
model_cache_dir: str = Field(default="./data/cache/", description="Model cache directory")
model_max_length: int = Field(default=512, ge=10, description="Maximum generation length")
model_min_length: int = Field(default=10, ge=1, description="Minimum generation length")
model_temperature: float = Field(
default=0.7, ge=0.0, le=2.0, description="Sampling temperature"
)
model_top_k: int = Field(default=50, ge=0, description="Top-k sampling parameter")
model_top_p: float = Field(default=0.95, ge=0.0, le=1.0, description="Top-p (nucleus) sampling")
model_num_return_sequences: int = Field(
default=1, ge=1, description="Number of sequences to generate"
)
use_gpu: bool = Field(default=False, description="Use GPU for inference if available")
# Grok (xAI) API Configuration
grok_api_key: Optional[str] = Field(
default=None,
description="xAI Grok API key (set to enable Grok provider)",
)
grok_base_url: str = Field(
default="https://api.x.ai/v1",
description="xAI API base URL",
)
grok_model_name: str = Field(
default="grok-4-fast",
description="xAI Grok model name for chat completions (grok-4-fast, grok-4-fast-reasoning)",
)
# RAG / Long-Term Memory Configuration
openai_api_key: Optional[str] = Field(
default=None,
description="OpenAI API key for embeddings",
)
pinecone_api_key: Optional[str] = Field(
default=None,
description="Pinecone API key",
)
pinecone_index_name: str = Field(
default="goxy-memory",
description="Pinecone index name",
)
embedding_model: str = Field(
default="text-embedding-3-small",
description="OpenAI embedding model",
)
# Moderation Settings
enable_moderation: bool = Field(default=True, description="Enable content moderation")
moderation_model_name: str = Field(
default="original", description="Detoxify model variant (original, unbiased, multilingual)"
)
moderation_rejection_threshold: float = Field(
default=0.7,
ge=0.0,
le=1.0,
description="Toxicity score threshold for rejection",
)
moderation_strict_rejection_threshold: float = Field(
default=0.5,
ge=0.0,
le=1.0,
description="Strict mode toxicity threshold",
)
toxicity_threshold: float = Field(
default=0.7,
ge=0.0,
le=1.0,
description="Toxicity score threshold for rejection (deprecated, use moderation_rejection_threshold)",
)
toxicity_model: str = Field(
default="unitary/toxic-bert", description="Toxicity detection model (deprecated)"
)
moderation_enabled_categories: List[str] = Field(
default=["toxicity", "severe_toxicity", "obscene", "threat", "insult"],
description="Enabled moderation categories",
)
# Rate Limiting
rate_limit_enabled: bool = Field(default=True, description="Enable rate limiting")
rate_limit_requests: int = Field(default=100, ge=1, description="Max requests per window")
rate_limit_window: int = Field(default=60, ge=1, description="Rate limit window in seconds")
rate_limit_storage_url: str = Field(
default="memory://",
description="Rate limit storage URL (redis:// or memory://)",
)
# File Upload Settings
upload_dir: str = Field(default="./data/uploads/", description="Upload directory path")
max_upload_size: int = Field(default=10485760, ge=1024, description="Max upload size in bytes")
allowed_upload_extensions: List[str] = Field(
default=[".json", ".jsonl", ".csv", ".txt", ".pdf"],
description="Allowed file extensions",
)
# Logging Configuration
log_format: str = Field(default="json", description="Log format (json/console)")
log_file: Optional[str] = Field(default="logs/app.log", description="Log file path")
# Monitoring & Metrics
enable_metrics: bool = Field(default=True, description="Enable Prometheus metrics")
metrics_port: int = Field(default=9090, ge=1024, le=65535, description="Metrics server port")
metrics_path: str = Field(default="/metrics", description="Metrics endpoint path")
# Sentry (Optional)
sentry_dsn: Optional[str] = Field(default=None, description="Sentry DSN for error tracking")
sentry_environment: str = Field(default="development", description="Sentry environment name")
sentry_traces_sample_rate: float = Field(
default=0.1,
ge=0.0,
le=1.0,
description="Sentry traces sample rate",
)
# Fine-tuning Configuration
finetune_batch_size: int = Field(default=8, ge=1, description="Fine-tuning batch size")
finetune_learning_rate: float = Field(
default=0.00005,
gt=0.0,
description="Fine-tuning learning rate",
)
finetune_epochs: int = Field(default=3, ge=1, description="Fine-tuning epochs")
finetune_warmup_steps: int = Field(default=100, ge=0, description="Warmup steps")
finetune_save_steps: int = Field(default=500, ge=1, description="Save checkpoint every N steps")
finetune_eval_steps: int = Field(default=500, ge=1, description="Evaluate every N steps")
finetune_output_dir: str = Field(
default="./data/models/finetuned/",
description="Fine-tuned model output directory",
)
# Dataset Configuration
dataset_min_quality_score: float = Field(
default=3.0,
ge=0.0,
description="Minimum quality score for training data",
)
dataset_max_samples: int = Field(default=100000, ge=100, description="Maximum dataset samples")
dataset_split_ratio: float = Field(
default=0.8,
ge=0.5,
le=0.95,
description="Train/test split ratio",
)
# Performance Settings
request_timeout: int = Field(default=30, ge=1, description="Request timeout in seconds")
model_inference_timeout: int = Field(default=60, ge=1, description="Model inference timeout")
max_concurrent_requests: int = Field(default=100, ge=1, description="Max concurrent requests")
# Background Tasks
enable_background_tasks: bool = Field(default=True, description="Enable background tasks")
background_task_interval: int = Field(
default=300, ge=10, description="Background task interval"
)
# Feature Flags
feature_streaming_responses: bool = Field(
default=False, description="Enable streaming responses"
)
feature_batch_processing: bool = Field(default=False, description="Enable batch processing")
feature_webhook_notifications: bool = Field(
default=False, description="Enable webhook notifications"
)
feature_multi_model_support: bool = Field(
default=False, description="Enable multi-model support"
)
@field_validator("app_env")
@classmethod
def validate_environment(cls, v: str) -> str:
"""Validate environment is one of the allowed values."""
allowed = ["development", "staging", "production"]
if v.lower() not in allowed:
raise ValueError(f"app_env must be one of: {', '.join(allowed)}")
return v.lower()
@field_validator("log_level")
@classmethod
def validate_log_level(cls, v: str) -> str:
"""Validate log level is valid."""
allowed = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
if v.upper() not in allowed:
raise ValueError(f"log_level must be one of: {', '.join(allowed)}")
return v.upper()
@field_validator("model_device")
@classmethod
def validate_device(cls, v: str) -> str:
"""Validate model device is cpu or cuda."""
if v.lower() not in ["cpu", "cuda"]:
raise ValueError("model_device must be 'cpu' or 'cuda'")
return v.lower()
@property
def is_production(self) -> bool:
"""Check if running in production environment."""
return self.app_env == "production"
@property
def is_development(self) -> bool:
"""Check if running in development environment."""
return self.app_env == "development"
def get_database_url_sync(self) -> str:
"""Get synchronous database URL (for Alembic migrations)."""
return self.database_url.replace("+asyncpg", "").replace("+psycopg", "")
@lru_cache()
def get_settings() -> Settings:
"""
Get cached application settings.
Uses lru_cache to ensure settings are loaded only once.
Call this function to access application settings throughout the app.
Returns:
Settings: Application settings instance
"""
return Settings()
# Global settings instance (convenience export)
settings = get_settings()