|
|
|
|
|
|
|
|
from fastapi import FastAPI, UploadFile, File, HTTPException |
|
|
from pydantic import BaseModel, Field |
|
|
from starlette.responses import StreamingResponse |
|
|
import logging |
|
|
import io |
|
|
|
|
|
|
|
|
try: |
|
|
from asr_engine import ASREngine |
|
|
except ImportError: |
|
|
ASREngine = None |
|
|
|
|
|
try: |
|
|
|
|
|
import os |
|
|
os.environ['NUMBA_DISABLE_JIT'] = '1' |
|
|
os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache' |
|
|
from tts_engine import TTSEngine |
|
|
TTS_AVAILABLE = True |
|
|
except ImportError as e: |
|
|
print(f"⚠️ TTS engine not available: {e}") |
|
|
TTSEngine = None |
|
|
TTS_AVAILABLE = False |
|
|
|
|
|
try: |
|
|
from translation_engine import TranslationEngine |
|
|
except ImportError: |
|
|
TranslationEngine = None |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class TranslationRequest(BaseModel): |
|
|
text: str = Field(..., min_length=1, max_length=1000, description="Text to translate") |
|
|
target_lang: str = Field(..., description="Target language code") |
|
|
|
|
|
class TTSRequest(BaseModel): |
|
|
text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech") |
|
|
speaker: str = Field(default="p225", description="Speaker ID for TTS") |
|
|
|
|
|
|
|
|
app = FastAPI( |
|
|
title="Carsa AI API", |
|
|
description="Complete AI-powered translation and speech synthesis API", |
|
|
version="1.0.0" |
|
|
) |
|
|
|
|
|
asr_engine = None |
|
|
tts_engine = None |
|
|
translation_engine = None |
|
|
|
|
|
@app.on_event("startup") |
|
|
def load_models(): |
|
|
"""Load all AI models into memory when the server starts.""" |
|
|
global asr_engine, tts_engine, translation_engine |
|
|
|
|
|
try: |
|
|
if TranslationEngine: |
|
|
logger.info("Loading Translation Engine...") |
|
|
translation_engine = TranslationEngine() |
|
|
logger.info("✅ Translation Engine loaded") |
|
|
else: |
|
|
logger.warning("⚠️ Translation Engine not available") |
|
|
|
|
|
if ASREngine: |
|
|
logger.info("Loading ASR Engine...") |
|
|
asr_engine = ASREngine() |
|
|
logger.info("✅ ASR Engine loaded") |
|
|
else: |
|
|
logger.warning("⚠️ ASR Engine not available") |
|
|
|
|
|
if TTS_AVAILABLE and TTSEngine: |
|
|
logger.info("Loading TTS Engine...") |
|
|
tts_engine = TTSEngine() |
|
|
logger.info("✅ TTS Engine loaded") |
|
|
else: |
|
|
logger.warning("⚠️ TTS Engine not available") |
|
|
|
|
|
logger.info("--- All available models loaded. API is ready. ---") |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load models: {e}") |
|
|
raise e |
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
def read_root(): |
|
|
return { |
|
|
"status": "Carsa AI API is running", |
|
|
"version": "1.0.0", |
|
|
"services": ["translation", "speech-to-text", "text-to-speech"] |
|
|
} |
|
|
|
|
|
@app.get("/health") |
|
|
def health_check(): |
|
|
"""Health check endpoint to verify all services are running.""" |
|
|
return { |
|
|
"status": "healthy", |
|
|
"translation_engine": translation_engine is not None, |
|
|
"asr_engine": asr_engine is not None, |
|
|
"tts_engine": tts_engine is not None |
|
|
} |
|
|
|
|
|
@app.post("/translate") |
|
|
def translate_text(request: TranslationRequest): |
|
|
if not translation_engine: |
|
|
raise HTTPException(status_code=503, detail="Translation Engine not available.") |
|
|
|
|
|
|
|
|
if request.target_lang.lower() == "hausa": |
|
|
raise HTTPException( |
|
|
status_code=400, |
|
|
detail="Hausa translation has been discontinued. Please use Twi, Ga, or Ewe instead." |
|
|
) |
|
|
|
|
|
try: |
|
|
result = translation_engine.translate(request.text, request.target_lang) |
|
|
return {"translated_text": result} |
|
|
except Exception as e: |
|
|
logger.error(f"Translation Error: {e}", exc_info=True) |
|
|
raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}") |
|
|
|
|
|
@app.post("/speech-to-text") |
|
|
async def speech_to_text(audio_file: UploadFile = File(...)): |
|
|
""" |
|
|
Convert speech audio to text using ASR. |
|
|
Accepts audio file uploads in various formats (wav, mp3, m4a, etc.) |
|
|
""" |
|
|
if not asr_engine: |
|
|
raise HTTPException(status_code=503, detail="ASR Engine not available.") |
|
|
|
|
|
|
|
|
if not audio_file: |
|
|
raise HTTPException(status_code=422, detail="No audio file provided.") |
|
|
|
|
|
if audio_file.size == 0: |
|
|
raise HTTPException(status_code=422, detail="Audio file is empty.") |
|
|
|
|
|
|
|
|
if audio_file.size > 10 * 1024 * 1024: |
|
|
raise HTTPException(status_code=422, detail="Audio file too large. Maximum size is 10MB.") |
|
|
|
|
|
try: |
|
|
logger.info(f"Processing audio file: {audio_file.filename}, size: {audio_file.size} bytes, content_type: {audio_file.content_type}") |
|
|
audio_bytes = await audio_file.read() |
|
|
|
|
|
if len(audio_bytes) == 0: |
|
|
raise HTTPException(status_code=422, detail="Audio file contains no data.") |
|
|
|
|
|
transcribed_text = asr_engine.transcribe(audio_bytes) |
|
|
logger.info(f"ASR transcription successful: {transcribed_text[:50]}...") |
|
|
return {"transcribed_text": transcribed_text} |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"ASR Error: {e}", exc_info=True) |
|
|
raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}") |
|
|
|
|
|
@app.get("/supported-languages") |
|
|
def get_supported_languages(): |
|
|
"""Get list of supported translation languages.""" |
|
|
if not translation_engine: |
|
|
return { |
|
|
"supported_languages": [], |
|
|
"message": "Translation engine not available" |
|
|
} |
|
|
|
|
|
return { |
|
|
"supported_languages": list(translation_engine.language_models.keys()), |
|
|
"total_count": len(translation_engine.language_models), |
|
|
"message": "These are the currently supported languages for translation" |
|
|
} |
|
|
|
|
|
@app.get("/tts/status") |
|
|
def get_tts_status(): |
|
|
"""Get TTS engine status and information.""" |
|
|
if not tts_engine or not tts_engine.model: |
|
|
return { |
|
|
"status": "unavailable", |
|
|
"message": "TTS engine is not loaded" |
|
|
} |
|
|
|
|
|
try: |
|
|
model_info = tts_engine.get_model_info() |
|
|
return { |
|
|
"status": "available", |
|
|
"model_info": model_info |
|
|
} |
|
|
except Exception as e: |
|
|
logger.error(f"Error getting TTS status: {e}") |
|
|
return { |
|
|
"status": "error", |
|
|
"message": f"Error getting TTS status: {str(e)}" |
|
|
} |
|
|
|
|
|
@app.post("/text-to-speech") |
|
|
async def text_to_speech(request: TTSRequest): |
|
|
if not TTS_AVAILABLE: |
|
|
raise HTTPException( |
|
|
status_code=503, |
|
|
detail="TTS Engine not available. Requires Python 3.11 or lower to install TTS library." |
|
|
) |
|
|
|
|
|
if not tts_engine or not tts_engine.model: |
|
|
raise HTTPException(status_code=503, detail="TTS Engine not loaded.") |
|
|
|
|
|
try: |
|
|
|
|
|
audio_bytes = tts_engine.synthesize_to_bytes( |
|
|
text=request.text, |
|
|
speaker=request.speaker |
|
|
) |
|
|
|
|
|
|
|
|
return StreamingResponse( |
|
|
io.BytesIO(audio_bytes), |
|
|
media_type="audio/wav", |
|
|
headers={ |
|
|
"Content-Disposition": "attachment; filename=speech.wav", |
|
|
"Content-Length": str(len(audio_bytes)) |
|
|
} |
|
|
) |
|
|
except Exception as e: |
|
|
logger.error(f"TTS Error: {e}", exc_info=True) |
|
|
raise HTTPException(status_code=500, detail="Failed to generate speech.") |