|
|
|
"""
|
|
Centralized Configuration Management for Unified AI Services
|
|
Manages configuration for NER, OCR, and RAG services
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, Any, List
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
env_path = Path(__file__).parent / '.env'
|
|
if env_path.exists():
|
|
load_dotenv(dotenv_path=env_path)
|
|
else:
|
|
load_dotenv()
|
|
|
|
|
|
logging.basicConfig(
|
|
level=getattr(logging, os.getenv("LOG_LEVEL", "INFO").upper()),
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class BaseConfig:
|
|
"""Base configuration class with common settings"""
|
|
|
|
def __init__(self):
|
|
|
|
self.HOST = os.getenv("HOST", "0.0.0.0")
|
|
self.DEBUG = os.getenv("DEBUG", "False").lower() == "true"
|
|
|
|
|
|
self.POSTGRES_HOST = os.getenv("POSTGRES_HOST", "")
|
|
self.POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", "5432"))
|
|
self.POSTGRES_USER = os.getenv("POSTGRES_USER", "")
|
|
self.POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
|
|
self.POSTGRES_DATABASE = os.getenv("POSTGRES_DATABASE", "postgres")
|
|
|
|
|
|
self.AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "")
|
|
self.AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "")
|
|
self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-large")
|
|
self.AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-3-large")
|
|
|
|
|
|
self.AZURE_STORAGE_ACCOUNT_URL = os.getenv("AZURE_STORAGE_ACCOUNT_URL", "")
|
|
self.AZURE_BLOB_SAS_TOKEN = os.getenv("AZURE_BLOB_SAS_TOKEN", "")
|
|
self.BLOB_CONTAINER = os.getenv("BLOB_CONTAINER", "historylog")
|
|
|
|
|
|
self.MAX_FILE_SIZE = int(os.getenv("MAX_FILE_SIZE", "50")) * 1024 * 1024
|
|
self.REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "300"))
|
|
|
|
|
|
self.ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*")
|
|
|
|
def validate_azure_openai(self) -> bool:
|
|
"""Validate Azure OpenAI configuration"""
|
|
return bool(
|
|
self.AZURE_OPENAI_ENDPOINT and
|
|
self.AZURE_OPENAI_API_KEY and
|
|
self.AZURE_OPENAI_ENDPOINT != "YOUR_AZURE_OPENAI_ENDPOINT" and
|
|
self.AZURE_OPENAI_API_KEY != "YOUR_AZURE_OPENAI_KEY"
|
|
)
|
|
|
|
def validate_postgres(self) -> bool:
|
|
"""Validate PostgreSQL configuration"""
|
|
return bool(
|
|
self.POSTGRES_HOST and
|
|
self.POSTGRES_USER and
|
|
self.POSTGRES_PASSWORD and
|
|
self.POSTGRES_DATABASE
|
|
)
|
|
|
|
def validate_azure_storage(self) -> bool:
|
|
"""Validate Azure Storage configuration"""
|
|
return bool(
|
|
self.AZURE_STORAGE_ACCOUNT_URL and
|
|
self.AZURE_BLOB_SAS_TOKEN
|
|
)
|
|
|
|
class NERConfig(BaseConfig):
|
|
"""Configuration for NER Service"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.PORT = int(os.getenv("NER_PORT", "8500"))
|
|
|
|
|
|
self.DEEPSEEK_ENDPOINT = os.getenv("DEEPSEEK_ENDPOINT", "")
|
|
self.DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
|
|
self.DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL", "DeepSeek-R1-0528")
|
|
|
|
|
|
self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
|
|
|
|
|
|
self.MAX_TEXT_LENGTH = 100000
|
|
self.SUPPORTED_TEXT_FORMATS = {'.txt', '.doc', '.docx', '.rtf'}
|
|
self.SUPPORTED_OCR_FORMATS = {'.pdf', '.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif'}
|
|
|
|
|
|
self.ENTITY_TYPES = [
|
|
"PERSON", "ORGANIZATION", "LOCATION", "DATE", "TIME", "MONEY", "PRODUCT", "EVENT",
|
|
"VEHICLE", "SUSPICIOUS_OBJECT", "ILLEGAL_ACTIVITY", "EVIDENCE", "ILLEGAL_ITEM",
|
|
"WEAPON", "DRUG", "CHEMICAL", "DOCUMENT", "PHONE_NUMBER", "ADDRESS", "EMAIL"
|
|
]
|
|
|
|
self.RELATIONSHIP_TYPES = [
|
|
|
|
"works_for", "founded", "located_in", "part_of", "associated_with", "owns", "manages",
|
|
"leads", "reports_to", "collaborates_with", "partners_with", "supplies_to", "acquires",
|
|
"invests_in", "headquartered_in", "operates_in", "born_in", "lives_in", "studied_at",
|
|
"graduated_from", "worked_at", "visited", "attended", "participated_in", "sponsored",
|
|
"developed", "created", "invented", "discovered", "published", "authored", "edited",
|
|
|
|
"ทำงานที่", "ก่อตั้ง", "ตั้งอยู่ที่", "เป็นส่วนหนึ่งของ", "เกี่ยวข้องกับ", "เป็นเจ้าของ", "จัดการ",
|
|
"นำโดย", "รายงานต่อ", "ร่วมงานกับ", "เป็นพันธมิตรกับ", "จัดหาให้", "ซื้อกิจการ", "ลงทุนใน",
|
|
"สำนักงานใหญ่ที่", "ดำเนินการใน", "เกิดที่", "อาศัยอยู่ที่", "ศึกษาที่", "จบการศึกษาจาก",
|
|
|
|
"arrested_by", "investigated_by", "confiscated_from", "used_in", "evidence_of", "witness_of",
|
|
"victim_of", "suspect_in", "charged_with", "convicted_of", "sentenced_by", "defended_by",
|
|
"prosecuted_by", "testified_against", "alibi_for", "found_at", "seized_from", "linked_to",
|
|
"จับกุมโดย", "สอบสวนโดย", "ยึดจาก", "ใช้ในการ", "หลักฐานของ", "พยานใน", "เหยื่อของ",
|
|
"ผู้ต้องสงสัยใน", "ถูกตั้งข้อหา", "ถูกตัดสิน", "ถูกพิพากษาโดย", "ต่อสู้คดีโดย", "ฟ้องร้องโดย",
|
|
"ให้การต่อต้าน", "เป็นข้อแก้ตัวสำหรับ", "พบที่", "ยึดจาก", "เชื่อมโยงกับ",
|
|
|
|
"possess_illegal", "transport_illegal", "sell_illegal", "buy_illegal", "hide_evidence",
|
|
"plan_crime", "commit_crime", "flee_from", "escape_from", "hide_at", "meet_with",
|
|
"communicate_with", "threaten", "blackmail", "bribe", "corrupt", "money_launder",
|
|
"ครอบครองของผิดกฎหมาย", "ขนส่งของผิดกฎหมาย", "ขายของผิดกฎหมาย", "ซื้อของผิดกฎหมาย",
|
|
"ซ่อนหลักฐาน", "วางแผนอาชญากรรม", "กระทำอาชญากรรม", "หลบหนีจาก", "แอบซ่อนที่",
|
|
"พบปะกับ", "ติดต่อกับ", "ข่มขู่", "แบล็คเมล์", "ให้สินบน", "ทุจริต", "ฟอกเงิน"
|
|
]
|
|
|
|
def validate_deepseek(self) -> bool:
|
|
"""Validate DeepSeek configuration"""
|
|
return bool(
|
|
self.DEEPSEEK_ENDPOINT and
|
|
self.DEEPSEEK_API_KEY and
|
|
self.DEEPSEEK_ENDPOINT != "YOUR_DEEPSEEK_ENDPOINT" and
|
|
self.DEEPSEEK_API_KEY != "YOUR_DEEPSEEK_API_KEY"
|
|
)
|
|
|
|
class OCRConfig(BaseConfig):
|
|
"""Configuration for OCR Service"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.PORT = int(os.getenv("OCR_PORT", "8400"))
|
|
|
|
|
|
self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT", "")
|
|
self.AZURE_DOCUMENT_INTELLIGENCE_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_KEY", "")
|
|
|
|
|
|
self.MAX_IMAGES_PER_PAGE = int(os.getenv("MAX_IMAGES_PER_PAGE", "10"))
|
|
self.USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
|
|
|
def validate_azure_document_intelligence(self) -> bool:
|
|
"""Validate Azure Document Intelligence configuration"""
|
|
return bool(
|
|
self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT and
|
|
self.AZURE_DOCUMENT_INTELLIGENCE_KEY and
|
|
self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT != "YOUR_FORM_RECOGNIZER_ENDPOINT" and
|
|
self.AZURE_DOCUMENT_INTELLIGENCE_KEY != "YOUR_FORM_RECOGNIZER_KEY"
|
|
)
|
|
|
|
class RAGConfig(BaseConfig):
|
|
"""Configuration for RAG Service"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.PORT = int(os.getenv("RAG_PORT", "8401"))
|
|
|
|
|
|
self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
|
|
|
|
|
|
self.PG_HOST = self.POSTGRES_HOST
|
|
self.PG_PORT = self.POSTGRES_PORT
|
|
self.PG_DATABASE = os.getenv("PG_DATABASE", "vectorsearch")
|
|
self.PG_USER = self.POSTGRES_USER
|
|
self.PG_PASSWORD = self.POSTGRES_PASSWORD
|
|
self.PG_SSL_MODE = os.getenv("PG_SSL_MODE", "require")
|
|
|
|
|
|
self.CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "1000"))
|
|
self.CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "200"))
|
|
self.MIN_CHUNK_SIZE = int(os.getenv("MIN_CHUNK_SIZE", "50"))
|
|
|
|
|
|
self.AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT", "text-embedding-3-small")
|
|
self.AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
|
|
|
|
class UnifiedConfig:
|
|
"""Unified configuration for all services"""
|
|
|
|
def __init__(self):
|
|
self.ner = NERConfig()
|
|
self.ocr = OCRConfig()
|
|
self.rag = RAGConfig()
|
|
|
|
|
|
self.MAIN_PORT = int(os.getenv("MAIN_PORT", "8000"))
|
|
self.MAIN_HOST = os.getenv("MAIN_HOST", "0.0.0.0")
|
|
|
|
|
|
self.NER_SERVICE_URL = f"http://localhost:{self.ner.PORT}"
|
|
self.OCR_SERVICE_URL = f"http://localhost:{self.ocr.PORT}"
|
|
self.RAG_SERVICE_URL = f"http://localhost:{self.rag.PORT}"
|
|
|
|
|
|
self.HEALTH_CHECK_TIMEOUT = 30
|
|
self.HEALTH_CHECK_RETRIES = 3
|
|
self.HEALTH_CHECK_INTERVAL = 5
|
|
|
|
|
|
self.SERVICE_WEIGHTS = {
|
|
"ner": 1.0,
|
|
"ocr": 1.0,
|
|
"rag": 1.0
|
|
}
|
|
|
|
def validate_all(self) -> Dict[str, Dict[str, bool]]:
|
|
"""Validate all service configurations"""
|
|
validation_results = {
|
|
"ner": {
|
|
"deepseek": self.ner.validate_deepseek(),
|
|
"azure_openai": self.ner.validate_azure_openai(),
|
|
"postgres": self.ner.validate_postgres(),
|
|
"azure_storage": self.ner.validate_azure_storage()
|
|
},
|
|
"ocr": {
|
|
"azure_document_intelligence": self.ocr.validate_azure_document_intelligence()
|
|
},
|
|
"rag": {
|
|
"azure_openai": self.rag.validate_azure_openai(),
|
|
"postgres": self.rag.validate_postgres()
|
|
}
|
|
}
|
|
return validation_results
|
|
|
|
def get_service_config(self, service_name: str) -> BaseConfig:
|
|
"""Get configuration for a specific service"""
|
|
service_configs = {
|
|
"ner": self.ner,
|
|
"ocr": self.ocr,
|
|
"rag": self.rag
|
|
}
|
|
return service_configs.get(service_name.lower())
|
|
|
|
def get_database_config(self) -> Dict[str, str]:
|
|
"""Get database configuration for services that need it"""
|
|
return {
|
|
"host": self.ner.POSTGRES_HOST,
|
|
"port": str(self.ner.POSTGRES_PORT),
|
|
"user": self.ner.POSTGRES_USER,
|
|
"password": self.ner.POSTGRES_PASSWORD,
|
|
"database": self.ner.POSTGRES_DATABASE,
|
|
"ssl_mode": getattr(self.rag, 'PG_SSL_MODE', 'require')
|
|
}
|
|
|
|
def get_azure_openai_config(self) -> Dict[str, str]:
|
|
"""Get Azure OpenAI configuration for services that need it"""
|
|
return {
|
|
"endpoint": self.ner.AZURE_OPENAI_ENDPOINT,
|
|
"api_key": self.ner.AZURE_OPENAI_API_KEY,
|
|
"embedding_model": self.ner.EMBEDDING_MODEL,
|
|
"deployment_name": self.ner.AZURE_OPENAI_DEPLOYMENT_NAME
|
|
}
|
|
|
|
def print_configuration_summary(self):
|
|
"""Print a summary of all configurations"""
|
|
print("🔧 Configuration Summary")
|
|
print("=" * 50)
|
|
|
|
|
|
validation_results = self.validate_all()
|
|
|
|
|
|
print(f"📝 NER Service (Port {self.ner.PORT}):")
|
|
print(f" DeepSeek: {'✅' if validation_results['ner']['deepseek'] else '❌'}")
|
|
print(f" Azure OpenAI: {'✅' if validation_results['ner']['azure_openai'] else '❌'}")
|
|
print(f" PostgreSQL: {'✅' if validation_results['ner']['postgres'] else '❌'}")
|
|
print(f" Azure Storage: {'✅' if validation_results['ner']['azure_storage'] else '❌'}")
|
|
print(f" OCR Service URL: {self.ner.OCR_SERVICE_URL}")
|
|
|
|
|
|
print(f"\n🔍 OCR Service (Port {self.ocr.PORT}):")
|
|
print(f" Azure Document Intelligence: {'✅' if validation_results['ocr']['azure_document_intelligence'] else '❌'}")
|
|
print(f" Max File Size: {self.ocr.MAX_FILE_SIZE / (1024*1024):.0f} MB")
|
|
|
|
|
|
print(f"\n🧠 RAG Service (Port {self.rag.PORT}):")
|
|
print(f" Azure OpenAI: {'✅' if validation_results['rag']['azure_openai'] else '❌'}")
|
|
print(f" PostgreSQL: {'✅' if validation_results['rag']['postgres'] else '❌'}")
|
|
print(f" OCR Service URL: {self.rag.OCR_SERVICE_URL}")
|
|
print(f" Chunk Size: {self.rag.CHUNK_SIZE}")
|
|
|
|
|
|
print(f"\n🌐 Main App (Port {self.MAIN_PORT}):")
|
|
print(f" NER Service: {self.NER_SERVICE_URL}")
|
|
print(f" OCR Service: {self.OCR_SERVICE_URL}")
|
|
print(f" RAG Service: {self.RAG_SERVICE_URL}")
|
|
|
|
|
|
print(f"\n🗄️ Database Configuration:")
|
|
print(f" Host: {self.ner.POSTGRES_HOST}")
|
|
print(f" Port: {self.ner.POSTGRES_PORT}")
|
|
print(f" User: {self.ner.POSTGRES_USER}")
|
|
print(f" NER Database: {self.ner.POSTGRES_DATABASE}")
|
|
print(f" RAG Database: {self.rag.PG_DATABASE}")
|
|
|
|
|
|
all_validations = []
|
|
for service, validations in validation_results.items():
|
|
all_validations.extend(validations.values())
|
|
|
|
if not all(all_validations):
|
|
print(f"\n⚠️ CONFIGURATION ISSUES DETECTED:")
|
|
for service, validations in validation_results.items():
|
|
for component, is_valid in validations.items():
|
|
if not is_valid:
|
|
print(f" ❌ {service.upper()}: {component} not configured")
|
|
else:
|
|
print(f"\n✅ All configurations are valid!")
|
|
|
|
|
|
config = UnifiedConfig()
|
|
|
|
def get_config() -> UnifiedConfig:
|
|
"""Get the global configuration instance"""
|
|
return config
|
|
|
|
def validate_environment() -> bool:
|
|
"""Validate the entire environment configuration"""
|
|
validation_results = config.validate_all()
|
|
|
|
|
|
critical_components = [
|
|
validation_results['ner']['azure_openai'],
|
|
validation_results['ner']['postgres'],
|
|
validation_results['ocr']['azure_document_intelligence'],
|
|
validation_results['rag']['azure_openai'],
|
|
validation_results['rag']['postgres']
|
|
]
|
|
|
|
return all(critical_components)
|
|
|
|
if __name__ == "__main__":
|
|
"""Test configuration loading and validation"""
|
|
print("🧪 Testing Configuration Loading")
|
|
print("=" * 40)
|
|
|
|
try:
|
|
config.print_configuration_summary()
|
|
|
|
if validate_environment():
|
|
print("\n🎉 Environment validation passed!")
|
|
print("All critical services are properly configured.")
|
|
else:
|
|
print("\n❌ Environment validation failed!")
|
|
print("Some critical services are not properly configured.")
|
|
print("Please check your .env file and update missing values.")
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ Configuration loading failed: {e}")
|
|
logger.error(f"Configuration error: {e}") |