Spaces:

CarsaAI
/

carsa_api

Running

App Files Files Community

carsa_api / main.py

athmontech

Remove Hausa language support - model discontinued

130ce6d 2 months ago

raw

history blame contribute delete

7.78 kB

	# main.py - Final Production-Ready Backend

	from fastapi import FastAPI, UploadFile, File, HTTPException
	from pydantic import BaseModel, Field
	from starlette.responses import StreamingResponse
	import logging
	import io

	# Import our engine blueprints
	try:
	from asr_engine import ASREngine
	except ImportError:
	ASREngine = None

	try:
	# Set environment variables to prevent numba caching issues
	import os
	os.environ['NUMBA_DISABLE_JIT'] = '1'
	os.environ['NUMBA_CACHE_DIR'] = '/tmp/numba_cache'
	from tts_engine import TTSEngine
	TTS_AVAILABLE = True
	except ImportError as e:
	print(f"⚠️ TTS engine not available: {e}")
	TTSEngine = None
	TTS_AVAILABLE = False

	try:
	from translation_engine import TranslationEngine
	except ImportError:
	TranslationEngine = None

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# --- Data Models ---
	class TranslationRequest(BaseModel):
	text: str = Field(..., min_length=1, max_length=1000, description="Text to translate")
	target_lang: str = Field(..., description="Target language code")

	class TTSRequest(BaseModel):
	text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech")
	speaker: str = Field(default="p225", description="Speaker ID for TTS")

	# --- App and AI Engine Initialization ---
	app = FastAPI(
	title="Carsa AI API",
	description="Complete AI-powered translation and speech synthesis API",
	version="1.0.0"
	)

	asr_engine = None
	tts_engine = None
	translation_engine = None

	@app.on_event("startup")
	def load_models():
	"""Load all AI models into memory when the server starts."""
	global asr_engine, tts_engine, translation_engine

	try:
	if TranslationEngine:
	logger.info("Loading Translation Engine...")
	translation_engine = TranslationEngine()
	logger.info("✅ Translation Engine loaded")
	else:
	logger.warning("⚠️ Translation Engine not available")

	if ASREngine:
	logger.info("Loading ASR Engine...")
	asr_engine = ASREngine()
	logger.info("✅ ASR Engine loaded")
	else:
	logger.warning("⚠️ ASR Engine not available")

	if TTS_AVAILABLE and TTSEngine:
	logger.info("Loading TTS Engine...")
	tts_engine = TTSEngine()
	logger.info("✅ TTS Engine loaded")
	else:
	logger.warning("⚠️ TTS Engine not available")

	logger.info("--- All available models loaded. API is ready. ---")
	except Exception as e:
	logger.error(f"Failed to load models: {e}")
	raise e

	# --- API Endpoints ---
	@app.get("/")
	def read_root():
	return {
	"status": "Carsa AI API is running",
	"version": "1.0.0",
	"services": ["translation", "speech-to-text", "text-to-speech"]
	}

	@app.get("/health")
	def health_check():
	"""Health check endpoint to verify all services are running."""
	return {
	"status": "healthy",
	"translation_engine": translation_engine is not None,
	"asr_engine": asr_engine is not None,
	"tts_engine": tts_engine is not None
	}

	@app.post("/translate")
	def translate_text(request: TranslationRequest):
	if not translation_engine:
	raise HTTPException(status_code=503, detail="Translation Engine not available.")

	# Block Hausa requests (discontinued)
	if request.target_lang.lower() == "hausa":
	raise HTTPException(
	status_code=400,
	detail="Hausa translation has been discontinued. Please use Twi, Ga, or Ewe instead."
	)

	try:
	result = translation_engine.translate(request.text, request.target_lang)
	return {"translated_text": result}
	except Exception as e:
	logger.error(f"Translation Error: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")

	@app.post("/speech-to-text")
	async def speech_to_text(audio_file: UploadFile = File(...)):
	"""
	Convert speech audio to text using ASR.
	Accepts audio file uploads in various formats (wav, mp3, m4a, etc.)
	"""
	if not asr_engine:
	raise HTTPException(status_code=503, detail="ASR Engine not available.")

	# Validate file
	if not audio_file:
	raise HTTPException(status_code=422, detail="No audio file provided.")

	if audio_file.size == 0:
	raise HTTPException(status_code=422, detail="Audio file is empty.")

	# Check file size (max 10MB)
	if audio_file.size > 10 * 1024 * 1024:
	raise HTTPException(status_code=422, detail="Audio file too large. Maximum size is 10MB.")

	try:
	logger.info(f"Processing audio file: {audio_file.filename}, size: {audio_file.size} bytes, content_type: {audio_file.content_type}")
	audio_bytes = await audio_file.read()

	if len(audio_bytes) == 0:
	raise HTTPException(status_code=422, detail="Audio file contains no data.")

	transcribed_text = asr_engine.transcribe(audio_bytes)
	logger.info(f"ASR transcription successful: {transcribed_text[:50]}...")
	return {"transcribed_text": transcribed_text}

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"ASR Error: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}")

	@app.get("/supported-languages")
	def get_supported_languages():
	"""Get list of supported translation languages."""
	if not translation_engine:
	return {
	"supported_languages": [],
	"message": "Translation engine not available"
	}

	return {
	"supported_languages": list(translation_engine.language_models.keys()),
	"total_count": len(translation_engine.language_models),
	"message": "These are the currently supported languages for translation"
	}

	@app.get("/tts/status")
	def get_tts_status():
	"""Get TTS engine status and information."""
	if not tts_engine or not tts_engine.model:
	return {
	"status": "unavailable",
	"message": "TTS engine is not loaded"
	}

	try:
	model_info = tts_engine.get_model_info()
	return {
	"status": "available",
	"model_info": model_info
	}
	except Exception as e:
	logger.error(f"Error getting TTS status: {e}")
	return {
	"status": "error",
	"message": f"Error getting TTS status: {str(e)}"
	}

	@app.post("/text-to-speech")
	async def text_to_speech(request: TTSRequest):
	if not TTS_AVAILABLE:
	raise HTTPException(
	status_code=503,
	detail="TTS Engine not available. Requires Python 3.11 or lower to install TTS library."
	)

	if not tts_engine or not tts_engine.model:
	raise HTTPException(status_code=503, detail="TTS Engine not loaded.")

	try:
	# Use the new synthesize_to_bytes method
	audio_bytes = tts_engine.synthesize_to_bytes(
	text=request.text,
	speaker=request.speaker
	)

	# Return audio as streaming response
	return StreamingResponse(
	io.BytesIO(audio_bytes),
	media_type="audio/wav",
	headers={
	"Content-Disposition": "attachment; filename=speech.wav",
	"Content-Length": str(len(audio_bytes))
	}
	)
	except Exception as e:
	logger.error(f"TTS Error: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail="Failed to generate speech.")