# main.py - Final Production-Ready Backend from fastapi import FastAPI, UploadFile, File, HTTPException from pydantic import BaseModel, Field from starlette.responses import StreamingResponse import logging import io # Import our engine blueprints try: from asr_engine import ASREngine except ImportError: ASREngine = None try: # Set environment variables to prevent numba caching issues import os os.environ['NUMBA_DISABLE_JIT'] = '1' os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache' from tts_engine import TTSEngine TTS_AVAILABLE = True except ImportError as e: print(f"⚠️ TTS engine not available: {e}") TTSEngine = None TTS_AVAILABLE = False try: from translation_engine import TranslationEngine except ImportError: TranslationEngine = None # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # --- Data Models --- class TranslationRequest(BaseModel): text: str = Field(..., min_length=1, max_length=1000, description="Text to translate") target_lang: str = Field(..., description="Target language code") class TTSRequest(BaseModel): text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech") speaker: str = Field(default="p225", description="Speaker ID for TTS") # --- App and AI Engine Initialization --- app = FastAPI( title="Carsa AI API", description="Complete AI-powered translation and speech synthesis API", version="1.0.0" ) asr_engine = None tts_engine = None translation_engine = None @app.on_event("startup") def load_models(): """Load all AI models into memory when the server starts.""" global asr_engine, tts_engine, translation_engine try: if TranslationEngine: logger.info("Loading Translation Engine...") translation_engine = TranslationEngine() logger.info("✅ Translation Engine loaded") else: logger.warning("⚠️ Translation Engine not available") if ASREngine: logger.info("Loading ASR Engine...") asr_engine = ASREngine() logger.info("✅ ASR Engine loaded") else: logger.warning("⚠️ ASR Engine not available") if TTS_AVAILABLE and TTSEngine: logger.info("Loading TTS Engine...") tts_engine = TTSEngine() logger.info("✅ TTS Engine loaded") else: logger.warning("⚠️ TTS Engine not available") logger.info("--- All available models loaded. API is ready. ---") except Exception as e: logger.error(f"Failed to load models: {e}") raise e # --- API Endpoints --- @app.get("/") def read_root(): return { "status": "Carsa AI API is running", "version": "1.0.0", "services": ["translation", "speech-to-text", "text-to-speech"] } @app.get("/health") def health_check(): """Health check endpoint to verify all services are running.""" return { "status": "healthy", "translation_engine": translation_engine is not None, "asr_engine": asr_engine is not None, "tts_engine": tts_engine is not None } @app.post("/translate") def translate_text(request: TranslationRequest): if not translation_engine: raise HTTPException(status_code=503, detail="Translation Engine not available.") # Block Hausa requests (discontinued) if request.target_lang.lower() == "hausa": raise HTTPException( status_code=400, detail="Hausa translation has been discontinued. Please use Twi, Ga, or Ewe instead." ) try: result = translation_engine.translate(request.text, request.target_lang) return {"translated_text": result} except Exception as e: logger.error(f"Translation Error: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}") @app.post("/speech-to-text") async def speech_to_text(audio_file: UploadFile = File(...)): """ Convert speech audio to text using ASR. Accepts audio file uploads in various formats (wav, mp3, m4a, etc.) """ if not asr_engine: raise HTTPException(status_code=503, detail="ASR Engine not available.") # Validate file if not audio_file: raise HTTPException(status_code=422, detail="No audio file provided.") if audio_file.size == 0: raise HTTPException(status_code=422, detail="Audio file is empty.") # Check file size (max 10MB) if audio_file.size > 10 * 1024 * 1024: raise HTTPException(status_code=422, detail="Audio file too large. Maximum size is 10MB.") try: logger.info(f"Processing audio file: {audio_file.filename}, size: {audio_file.size} bytes, content_type: {audio_file.content_type}") audio_bytes = await audio_file.read() if len(audio_bytes) == 0: raise HTTPException(status_code=422, detail="Audio file contains no data.") transcribed_text = asr_engine.transcribe(audio_bytes) logger.info(f"ASR transcription successful: {transcribed_text[:50]}...") return {"transcribed_text": transcribed_text} except HTTPException: raise except Exception as e: logger.error(f"ASR Error: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}") @app.get("/supported-languages") def get_supported_languages(): """Get list of supported translation languages.""" if not translation_engine: return { "supported_languages": [], "message": "Translation engine not available" } return { "supported_languages": list(translation_engine.language_models.keys()), "total_count": len(translation_engine.language_models), "message": "These are the currently supported languages for translation" } @app.get("/tts/status") def get_tts_status(): """Get TTS engine status and information.""" if not tts_engine or not tts_engine.model: return { "status": "unavailable", "message": "TTS engine is not loaded" } try: model_info = tts_engine.get_model_info() return { "status": "available", "model_info": model_info } except Exception as e: logger.error(f"Error getting TTS status: {e}") return { "status": "error", "message": f"Error getting TTS status: {str(e)}" } @app.post("/text-to-speech") async def text_to_speech(request: TTSRequest): if not TTS_AVAILABLE: raise HTTPException( status_code=503, detail="TTS Engine not available. Requires Python 3.11 or lower to install TTS library." ) if not tts_engine or not tts_engine.model: raise HTTPException(status_code=503, detail="TTS Engine not loaded.") try: # Use the new synthesize_to_bytes method audio_bytes = tts_engine.synthesize_to_bytes( text=request.text, speaker=request.speaker ) # Return audio as streaming response return StreamingResponse( io.BytesIO(audio_bytes), media_type="audio/wav", headers={ "Content-Disposition": "attachment; filename=speech.wav", "Content-Length": str(len(audio_bytes)) } ) except Exception as e: logger.error(f"TTS Error: {e}", exc_info=True) raise HTTPException(status_code=500, detail="Failed to generate speech.")