Spaces:
Runtime error
Runtime error
import os | |
# # β Setup environment and backend | |
# os.environ["HF_HOME"] = "/tmp" | |
# torchaudio.set_audio_backend("soundfile") | |
import re | |
import traceback | |
import tempfile | |
from fastapi import FastAPI, UploadFile, File | |
from transformers import pipeline, Wav2Vec2Processor, Wav2Vec2ForCTC | |
import torch | |
import torchaudio | |
# β Setup environment and backend | |
os.environ["HF_HOME"] = "/tmp" | |
torchaudio.set_audio_backend("soundfile") | |
app = FastAPI() | |
# β Load the processor and model | |
processor = Wav2Vec2Processor.from_pretrained("tacab/ASR_SOMALI") | |
model = Wav2Vec2ForCTC.from_pretrained("tacab/ASR_SOMALI") | |
model.to("cpu") | |
# β Optional: pipeline ASR with word timestamps | |
asr = pipeline( | |
"automatic-speech-recognition", | |
model="tacab/ASR_SOMALI", | |
tokenizer="tacab/ASR_SOMALI", | |
chunk_length_s=30, | |
stride_length_s=6, | |
return_timestamps="word", | |
device=-1 | |
) | |
# β Text auto-punctuation | |
def auto_punctuate(text): | |
text = text.strip() | |
def capitalize_sentences(text): | |
sentences = re.split(r'(?<=[.?!])\s+', text) | |
return '. '.join(s.strip().capitalize() for s in sentences if s) | |
if '.' not in text and len(text.split()) > 5: | |
text += '.' | |
words = text.split() | |
new_text = "" | |
for i in range(0, len(words), 10): | |
segment = " ".join(words[i:i+10]) | |
new_text += segment.strip().capitalize() + ". " | |
return capitalize_sentences(new_text.strip()) | |
async def transcribe_audio(file: UploadFile = File(...)): | |
try: | |
# β Save audio file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
tmp.write(await file.read()) | |
tmp_path = tmp.name | |
# β Run ASR | |
result = asr(tmp_path) | |
raw_text = result.get("text", "").strip() | |
if not raw_text: | |
return {"text": "", "message": "β οΈ Qoraal lama helin."} | |
cleaned_text = auto_punctuate(raw_text) | |
return { | |
"text": cleaned_text, | |
"raw": raw_text, | |
"timestamps": result.get("chunks", []), | |
"message": "β Turjumaad guul leh" | |
} | |
except Exception as e: | |
traceback.print_exc() | |
return {"text": "", "message": f"β Khalad dhacay: {str(e)}"} | |