Gradio_chat / settings.py
princeta3011's picture
Upload 11 files
69a077e verified
import os
from pydantic import BaseModel
from typing import Dict, List
from dotenv import load_dotenv
load_dotenv()
class DatabaseConfig(BaseModel):
mongo_uri: str = os.getenv("mongo_uri")
mongo_db: str = os.getenv("mongo_db")
neo4j_uri: str = os.getenv("neo4j_uri")
neo4j_user: str = os.getenv("neo4j_user")
neo4j_password: str = os.getenv("neo4j_password")
class ScrapingConfig(BaseModel):
timeout: int = 30000
wait_for_selector: str = "body"
headless: bool = True
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
max_retries: int = 3
delay_between_requests: float = 1.0
class ExtractionConfig(BaseModel):
content_selectors: List[str] = [
"article", "main", ".content", "#content",
".post", ".article-body", "p", "h1", "h2", "h3"
]
ignore_selectors: List[str] = [
"script", "style", "nav", "footer", "header",
".advertisement", ".ads", ".sidebar"
]
min_text_length: int = 50
extract_images: bool = True
extract_links: bool = True
class Settings:
def __init__(self):
self.database = DatabaseConfig()
self.scraping = ScrapingConfig()
self.extraction = ExtractionConfig()
def update_from_env(self):
# Update from environment variables if available
if os.getenv("mongo_uri"):
self.database.mongo_uri = os.getenv("mongo_uri")
if os.getenv("mongo_db"):
self.database.mongo_db = os.getenv("mongo_db")
if os.getenv("neo4j_uri"):
self.database.neo4j_uri = os.getenv("neo4j_uri")
if os.getenv("neo4j_user"):
self.database.neo4j_user = os.getenv("neo4j_user")
if os.getenv("neo4j_password"):
self.database.neo4j_password = os.getenv("neo4j_password")
settings = Settings()
settings.update_from_env()