carsa_api / main.py
athmontech's picture
Remove Hausa language support - model discontinued
130ce6d
# main.py - Final Production-Ready Backend
from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel, Field
from starlette.responses import StreamingResponse
import logging
import io
# Import our engine blueprints
try:
from asr_engine import ASREngine
except ImportError:
ASREngine = None
try:
# Set environment variables to prevent numba caching issues
import os
os.environ['NUMBA_DISABLE_JIT'] = '1'
os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache'
from tts_engine import TTSEngine
TTS_AVAILABLE = True
except ImportError as e:
print(f"⚠️ TTS engine not available: {e}")
TTSEngine = None
TTS_AVAILABLE = False
try:
from translation_engine import TranslationEngine
except ImportError:
TranslationEngine = None
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- Data Models ---
class TranslationRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=1000, description="Text to translate")
target_lang: str = Field(..., description="Target language code")
class TTSRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech")
speaker: str = Field(default="p225", description="Speaker ID for TTS")
# --- App and AI Engine Initialization ---
app = FastAPI(
title="Carsa AI API",
description="Complete AI-powered translation and speech synthesis API",
version="1.0.0"
)
asr_engine = None
tts_engine = None
translation_engine = None
@app.on_event("startup")
def load_models():
"""Load all AI models into memory when the server starts."""
global asr_engine, tts_engine, translation_engine
try:
if TranslationEngine:
logger.info("Loading Translation Engine...")
translation_engine = TranslationEngine()
logger.info("✅ Translation Engine loaded")
else:
logger.warning("⚠️ Translation Engine not available")
if ASREngine:
logger.info("Loading ASR Engine...")
asr_engine = ASREngine()
logger.info("✅ ASR Engine loaded")
else:
logger.warning("⚠️ ASR Engine not available")
if TTS_AVAILABLE and TTSEngine:
logger.info("Loading TTS Engine...")
tts_engine = TTSEngine()
logger.info("✅ TTS Engine loaded")
else:
logger.warning("⚠️ TTS Engine not available")
logger.info("--- All available models loaded. API is ready. ---")
except Exception as e:
logger.error(f"Failed to load models: {e}")
raise e
# --- API Endpoints ---
@app.get("/")
def read_root():
return {
"status": "Carsa AI API is running",
"version": "1.0.0",
"services": ["translation", "speech-to-text", "text-to-speech"]
}
@app.get("/health")
def health_check():
"""Health check endpoint to verify all services are running."""
return {
"status": "healthy",
"translation_engine": translation_engine is not None,
"asr_engine": asr_engine is not None,
"tts_engine": tts_engine is not None
}
@app.post("/translate")
def translate_text(request: TranslationRequest):
if not translation_engine:
raise HTTPException(status_code=503, detail="Translation Engine not available.")
# Block Hausa requests (discontinued)
if request.target_lang.lower() == "hausa":
raise HTTPException(
status_code=400,
detail="Hausa translation has been discontinued. Please use Twi, Ga, or Ewe instead."
)
try:
result = translation_engine.translate(request.text, request.target_lang)
return {"translated_text": result}
except Exception as e:
logger.error(f"Translation Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
@app.post("/speech-to-text")
async def speech_to_text(audio_file: UploadFile = File(...)):
"""
Convert speech audio to text using ASR.
Accepts audio file uploads in various formats (wav, mp3, m4a, etc.)
"""
if not asr_engine:
raise HTTPException(status_code=503, detail="ASR Engine not available.")
# Validate file
if not audio_file:
raise HTTPException(status_code=422, detail="No audio file provided.")
if audio_file.size == 0:
raise HTTPException(status_code=422, detail="Audio file is empty.")
# Check file size (max 10MB)
if audio_file.size > 10 * 1024 * 1024:
raise HTTPException(status_code=422, detail="Audio file too large. Maximum size is 10MB.")
try:
logger.info(f"Processing audio file: {audio_file.filename}, size: {audio_file.size} bytes, content_type: {audio_file.content_type}")
audio_bytes = await audio_file.read()
if len(audio_bytes) == 0:
raise HTTPException(status_code=422, detail="Audio file contains no data.")
transcribed_text = asr_engine.transcribe(audio_bytes)
logger.info(f"ASR transcription successful: {transcribed_text[:50]}...")
return {"transcribed_text": transcribed_text}
except HTTPException:
raise
except Exception as e:
logger.error(f"ASR Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}")
@app.get("/supported-languages")
def get_supported_languages():
"""Get list of supported translation languages."""
if not translation_engine:
return {
"supported_languages": [],
"message": "Translation engine not available"
}
return {
"supported_languages": list(translation_engine.language_models.keys()),
"total_count": len(translation_engine.language_models),
"message": "These are the currently supported languages for translation"
}
@app.get("/tts/status")
def get_tts_status():
"""Get TTS engine status and information."""
if not tts_engine or not tts_engine.model:
return {
"status": "unavailable",
"message": "TTS engine is not loaded"
}
try:
model_info = tts_engine.get_model_info()
return {
"status": "available",
"model_info": model_info
}
except Exception as e:
logger.error(f"Error getting TTS status: {e}")
return {
"status": "error",
"message": f"Error getting TTS status: {str(e)}"
}
@app.post("/text-to-speech")
async def text_to_speech(request: TTSRequest):
if not TTS_AVAILABLE:
raise HTTPException(
status_code=503,
detail="TTS Engine not available. Requires Python 3.11 or lower to install TTS library."
)
if not tts_engine or not tts_engine.model:
raise HTTPException(status_code=503, detail="TTS Engine not loaded.")
try:
# Use the new synthesize_to_bytes method
audio_bytes = tts_engine.synthesize_to_bytes(
text=request.text,
speaker=request.speaker
)
# Return audio as streaming response
return StreamingResponse(
io.BytesIO(audio_bytes),
media_type="audio/wav",
headers={
"Content-Disposition": "attachment; filename=speech.wav",
"Content-Length": str(len(audio_bytes))
}
)
except Exception as e:
logger.error(f"TTS Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Failed to generate speech.")