""" FastAPI backend for crossword puzzle generator with vector similarity search. """ import os import logging import time from datetime import datetime from contextlib import asynccontextmanager from pathlib import Path from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse import uvicorn from dotenv import load_dotenv from src.routes.api import router as api_router from src.services.thematic_word_service import ThematicWordService # Load environment variables load_dotenv() # Set up logging with filename and line numbers logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(filename)s:%(lineno)d - %(levelname)s - %(message)s', datefmt='%H:%M:%S' ) logger = logging.getLogger(__name__) # All services now use standard Python logging with filename/line numbers # Global thematic service instance thematic_service = None @asynccontextmanager async def lifespan(app: FastAPI): """Initialize and cleanup application resources.""" global thematic_service # Startup startup_time = time.time() logger.info("🚀 Initializing Python backend with thematic word service...") # Initialize thematic service try: service_start = time.time() logger.info("🔧 Creating ThematicWordService instance...") thematic_service = ThematicWordService() # Log cache configuration for debugging cache_status = thematic_service.get_cache_status() logger.info(f"📁 Cache directory: {cache_status['cache_directory']}") logger.info(f"🔍 Cache directory exists: {os.path.exists(cache_status['cache_directory'])}") logger.info(f"✏️ Cache directory writable: {os.access(cache_status['cache_directory'], os.W_OK)}") # Check for existing cache files cache_complete = cache_status['complete'] logger.info(f"📦 Existing cache complete: {cache_complete}") if not cache_complete: for cache_type in ['vocabulary_cache', 'frequency_cache', 'embeddings_cache']: cache_info = cache_status[cache_type] logger.info(f" {cache_type}: exists={cache_info['exists']}, path={cache_info['path']}") # Force eager initialization to create cache files logger.info("⚡ Starting thematic service initialization (creating cache files)...") await thematic_service.initialize_async() # Verify cache files were created cache_status_after = thematic_service.get_cache_status() logger.info(f"✅ Cache status after initialization: complete={cache_status_after['complete']}") for cache_type in ['vocabulary_cache', 'frequency_cache', 'embeddings_cache']: cache_info = cache_status_after[cache_type] if cache_info['exists']: logger.info(f" ✅ {cache_type}: {cache_info.get('size_mb', 0):.1f}MB") else: logger.warning(f" ❌ {cache_type}: NOT CREATED") init_time = time.time() - service_start logger.info(f"🎉 Thematic service initialized in {init_time:.2f}s") # Initialize WordNet clue generator during startup logger.info("🔧 Initializing WordNet clue generator...") try: wordnet_start = time.time() from src.services.wordnet_clue_generator import WordNetClueGenerator cache_dir = thematic_service.cache_dir if thematic_service else "./cache" wordnet_generator = WordNetClueGenerator(cache_dir=str(cache_dir)) wordnet_generator.initialize() # Store in thematic service for later use if thematic_service: thematic_service._wordnet_generator = wordnet_generator wordnet_time = time.time() - wordnet_start logger.info(f"✅ WordNet clue generator initialized in {wordnet_time:.2f}s") except Exception as e: logger.warning(f"⚠️ Failed to initialize WordNet clue generator during startup: {e}") logger.info("📝 WordNet clue generator will be initialized on first use") except ImportError as e: logger.error(f"❌ Missing dependencies for thematic service: {e}") logger.error("💡 Install missing packages: pip install wordfreq sentence-transformers torch scikit-learn") raise # Fail fast on missing dependencies except PermissionError as e: logger.error(f"❌ Permission error with cache directory: {e}") logger.error(f"💡 Check cache directory permissions: {thematic_service.cache_dir if 'thematic_service' in locals() else 'unknown'}") raise # Fail fast on permission issues except Exception as e: logger.error(f"❌ Failed to initialize thematic service: {e}") logger.error(f"🔍 Error type: {type(e).__name__}") import traceback logger.error(f"📋 Full traceback: {traceback.format_exc()}") raise # Fail fast instead of continuing without service # Make thematic service available to routes app.state.thematic_service = thematic_service yield # Shutdown logger.info("🛑 Shutting down Python backend...") # Thematic service doesn't need cleanup, but we can add it if needed in the future # Create FastAPI app app = FastAPI( title="Crossword Puzzle Generator API", description="Python backend with AI-powered thematic word generation", version="2.0.0", lifespan=lifespan ) # CORS configuration cors_origins = [] if os.getenv("NODE_ENV") == "production": # Production: same origin cors_origins = ["*"] # HuggingFace Spaces else: # Development: allow dev servers cors_origins = [ "http://localhost:5173", # Vite dev server "http://localhost:3000", # Alternative dev server "http://localhost:7860", # Local production test ] app.add_middleware( CORSMiddleware, allow_origins=cors_origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Include API routes app.include_router(api_router, prefix="/api") # Serve static files (frontend) static_path = Path(__file__).parent / "public" if static_path.exists(): app.mount("/assets", StaticFiles(directory=static_path / "assets"), name="assets") @app.get("/") async def serve_frontend(): """Serve the React frontend.""" index_path = static_path / "index.html" if index_path.exists(): return FileResponse(index_path) else: raise HTTPException(status_code=404, detail="Frontend not found") @app.get("/{full_path:path}") async def serve_spa_routes(full_path: str): """Serve React SPA routes.""" # For any non-API route, serve the React app if not full_path.startswith("api/"): index_path = static_path / "index.html" if index_path.exists(): return FileResponse(index_path) raise HTTPException(status_code=404, detail="Not found") @app.get("/health") async def health_check(): """Health check endpoint.""" return { "status": "healthy", "backend": "python", "vector_search": vector_service.is_initialized if vector_service else False } if __name__ == "__main__": port = int(os.getenv("PORT", 7860)) host = "0.0.0.0" if os.getenv("NODE_ENV") == "production" else "127.0.0.1" logger.info(f"🐍 Starting Python backend on {host}:{port}") uvicorn.run( "app:app", host=host, port=port, reload=os.getenv("NODE_ENV") != "production" )