import os # # ✅ Setup environment and backend # os.environ["HF_HOME"] = "/tmp" # torchaudio.set_audio_backend("soundfile") import re import traceback import tempfile from fastapi import FastAPI, UploadFile, File from transformers import pipeline, Wav2Vec2Processor, Wav2Vec2ForCTC import torch import torchaudio # ✅ Setup environment and backend os.environ["HF_HOME"] = "/tmp" torchaudio.set_audio_backend("soundfile") app = FastAPI() # ✅ Load the processor and model processor = Wav2Vec2Processor.from_pretrained("tacab/ASR_SOMALI") model = Wav2Vec2ForCTC.from_pretrained("tacab/ASR_SOMALI") model.to("cpu") # ✅ Optional: pipeline ASR with word timestamps asr = pipeline( "automatic-speech-recognition", model="tacab/ASR_SOMALI", tokenizer="tacab/ASR_SOMALI", chunk_length_s=30, stride_length_s=6, return_timestamps="word", device=-1 ) # ✅ Text auto-punctuation def auto_punctuate(text): text = text.strip() def capitalize_sentences(text): sentences = re.split(r'(?<=[.?!])\s+', text) return '. '.join(s.strip().capitalize() for s in sentences if s) if '.' not in text and len(text.split()) > 5: text += '.' words = text.split() new_text = "" for i in range(0, len(words), 10): segment = " ".join(words[i:i+10]) new_text += segment.strip().capitalize() + ". " return capitalize_sentences(new_text.strip()) @app.post("/transcribe") async def transcribe_audio(file: UploadFile = File(...)): try: # ✅ Save audio file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: tmp.write(await file.read()) tmp_path = tmp.name # ✅ Run ASR result = asr(tmp_path) raw_text = result.get("text", "").strip() if not raw_text: return {"text": "", "message": "⚠️ Qoraal lama helin."} cleaned_text = auto_punctuate(raw_text) return { "text": cleaned_text, "raw": raw_text, "timestamps": result.get("chunks", []), "message": "✅ Turjumaad guul leh" } except Exception as e: traceback.print_exc() return {"text": "", "message": f"❌ Khalad dhacay: {str(e)}"}