|
""" |
|
Created By: ishwor subedi |
|
Date: 2024-07-31 |
|
""" |
|
import os |
|
import tempfile |
|
from fastapi.responses import JSONResponse |
|
from fastapi import Form |
|
from fastapi import UploadFile, HTTPException, status |
|
from src.models.models import TextToSpeechRequest |
|
from fastapi.routing import APIRouter |
|
from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline |
|
|
|
speech_translator_router = APIRouter(tags=["SpeechTranscription"]) |
|
pipeline = SpeechTranscriptionPipeline() |
|
|
|
|
|
@speech_translator_router.post( |
|
"/text_to_speech", |
|
description=""" |
|
** For language refer below points** |
|
**Supported Locales:** |
|
|
|
- **English:** |
|
- **Australia:** |
|
- **Language:** en |
|
- **TLD:** com.au |
|
- **United Kingdom:** |
|
- **Language:** en |
|
- **TLD:** co.uk |
|
- **United States:** |
|
- **Language:** en |
|
- **TLD:** us |
|
- **Canada:** |
|
- **Language:** en |
|
- **TLD:** ca |
|
- **India:** |
|
- **Language:** en |
|
- **TLD:** co.in |
|
- **Ireland:** |
|
- **Language:** en |
|
- **TLD:** ie |
|
- **South Africa:** |
|
- **Language:** en |
|
- **TLD:** co.za |
|
- **Nigeria:** |
|
- **Language:** en |
|
- **TLD:** com.ng |
|
|
|
- **French:** |
|
- **Canada:** |
|
- **Language:** fr |
|
- **TLD:** ca |
|
- **France:** |
|
- **Language:** fr |
|
- **TLD:** fr |
|
|
|
- **Mandarin:** |
|
- **China Mainland:** |
|
- **Language:** zh-CN |
|
- **TLD:** any |
|
- **Taiwan:** |
|
- **Language:** zh-TW |
|
- **TLD:** any |
|
|
|
- **Portuguese:** |
|
- **Brazil:** |
|
- **Language:** pt |
|
- **TLD:** com.br |
|
- **Portugal:** |
|
- **Language:** pt |
|
- **TLD:** pt |
|
|
|
- **Spanish:** |
|
- **Mexico:** |
|
- **Language:** es |
|
- **TLD:** com.mx |
|
- **Spain:** |
|
- **Language:** es |
|
- **TLD:** es |
|
- **United States:** |
|
- **Language:** es |
|
- **TLD:** us |
|
""" |
|
) |
|
async def text_to_speech(request: TextToSpeechRequest): |
|
try: |
|
audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld) |
|
if not audio_bytes: |
|
raise ValueError("Audio generation failed.") |
|
return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200) |
|
except ValueError as ve: |
|
raise HTTPException(status_code=400, detail=str(ve)) |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail="Internal Server Error") |
|
|
|
|
|
@speech_translator_router.post( |
|
"/speech_to_text", |
|
description=""" |
|
** Specify the language used in the audio ** |
|
**Supported Languages:** |
|
|
|
**Major Languages:** |
|
- **English:** en |
|
- **Mandarin Chinese:** zh |
|
- **Spanish:** es |
|
- **French:** fr |
|
- **German:** de |
|
- **Italian:** it |
|
- **Japanese:** ja |
|
- **Korean:** ko |
|
- **Russian:** ru |
|
- **Portuguese:** pt |
|
- **Arabic:** ar |
|
|
|
**Additional Languages:** |
|
|
|
- **Indic Languages:** |
|
- **Hindi:** hi |
|
- **Bengali:** bn |
|
- **Tamil:** ta |
|
- **Telugu:** te |
|
|
|
- **Southeast Asian Languages:** |
|
- **Vietnamese:** vi |
|
- **Thai:** th |
|
- **Indonesian:** id |
|
- **Malay:** ms |
|
|
|
- **African Languages:** |
|
- **Swahili:** sw |
|
- **Yoruba:** yo |
|
- **Hausa:** ha |
|
|
|
- **European Languages:** |
|
- **Polish:** pl |
|
- **Dutch:** nl |
|
- **Swedish:** sv |
|
- **Norwegian:** no |
|
""" |
|
) |
|
async def speech_to_text(audio: UploadFile, lang: str = Form(...)): |
|
try: |
|
audio_bytes = await audio.read() |
|
if not audio_bytes: |
|
raise ValueError("Empty audio file") |
|
except Exception as e: |
|
raise HTTPException( |
|
status_code=status.HTTP_400_BAD_REQUEST, |
|
detail="Invalid audio file" |
|
) |
|
|
|
try: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: |
|
temp_audio_file.write(audio_bytes) |
|
temp_audio_file_path = temp_audio_file.name |
|
except Exception as e: |
|
raise HTTPException( |
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, |
|
detail="Could not process audio file" |
|
) |
|
|
|
try: |
|
transcript = pipeline.speech_to_text(temp_audio_file_path, lang) |
|
except FileNotFoundError as fnfe: |
|
raise HTTPException( |
|
status_code=status.HTTP_404_NOT_FOUND, |
|
detail="Temporary file not found" |
|
) |
|
except Exception as e: |
|
raise HTTPException( |
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, |
|
detail="Error processing speech-to-text" |
|
) |
|
finally: |
|
if os.path.exists(temp_audio_file_path): |
|
os.remove(temp_audio_file_path) |
|
|
|
return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200) |
|
|
|
|