from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse from transformers import pipeline import traceback import re import uvicorn import torchaudio # ✅ Force torchaudio to use the soundfile backend torchaudio.set_audio_backend("soundfile") app = FastAPI(title="Tacab ASR Somali API") # Load ASR model asr = pipeline( "automatic-speech-recognition", model="tacab/ASR_SOMALI", tokenizer="tacab/ASR_SOMALI", chunk_length_s=30, stride_length_s=6, return_timestamps="word", device=-1 ) # Auto punctuation def auto_punctuate(text): text = text.strip() def capitalize_sentences(text): sentences = re.split(r'(?<=[.?!])\s+', text) return '. '.join(s.strip().capitalize() for s in sentences if s) if '.' not in text and len(text.split()) > 5: text += '.' words = text.split() new_text = "" for i in range(0, len(words), 10): segment = " ".join(words[i:i+10]) new_text += segment.strip().capitalize() + ". " return capitalize_sentences(new_text.strip()) @app.post("/transcribe", tags=["ASR"], operation_id="generate") async def transcribe(file: UploadFile = File(...)): try: temp_path = f"/tmp/{file.filename}" with open(temp_path, "wb") as f: f.write(await file.read()) result = asr(temp_path) raw_text = result.get("text", "").strip() if not raw_text: return JSONResponse({"error": "No transcription result."}, status_code=400) cleaned_text = auto_punctuate(raw_text) # return {"transcription": cleaned_text} return {"text": cleaned_text} except Exception as e: traceback.print_exc() return JSONResponse({"error": str(e)}, status_code=500) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)