|
from fastapi import FastAPI, UploadFile, File |
|
from fastapi.responses import JSONResponse |
|
from transformers import pipeline |
|
import traceback |
|
import re |
|
import uvicorn |
|
import torchaudio |
|
|
|
|
|
torchaudio.set_audio_backend("soundfile") |
|
|
|
app = FastAPI(title="Tacab ASR Somali API") |
|
|
|
|
|
asr = pipeline( |
|
"automatic-speech-recognition", |
|
model="tacab/ASR_SOMALI", |
|
tokenizer="tacab/ASR_SOMALI", |
|
chunk_length_s=30, |
|
stride_length_s=6, |
|
return_timestamps="word", |
|
device=-1 |
|
) |
|
|
|
|
|
def auto_punctuate(text): |
|
text = text.strip() |
|
|
|
def capitalize_sentences(text): |
|
sentences = re.split(r'(?<=[.?!])\s+', text) |
|
return '. '.join(s.strip().capitalize() for s in sentences if s) |
|
|
|
if '.' not in text and len(text.split()) > 5: |
|
text += '.' |
|
|
|
words = text.split() |
|
new_text = "" |
|
for i in range(0, len(words), 10): |
|
segment = " ".join(words[i:i+10]) |
|
new_text += segment.strip().capitalize() + ". " |
|
|
|
return capitalize_sentences(new_text.strip()) |
|
|
|
@app.post("/transcribe", tags=["ASR"], operation_id="generate") |
|
async def transcribe(file: UploadFile = File(...)): |
|
try: |
|
temp_path = f"/tmp/{file.filename}" |
|
with open(temp_path, "wb") as f: |
|
f.write(await file.read()) |
|
|
|
result = asr(temp_path) |
|
raw_text = result.get("text", "").strip() |
|
if not raw_text: |
|
return JSONResponse({"error": "No transcription result."}, status_code=400) |
|
|
|
cleaned_text = auto_punctuate(raw_text) |
|
|
|
return {"text": cleaned_text} |
|
|
|
except Exception as e: |
|
traceback.print_exc() |
|
return JSONResponse({"error": str(e)}, status_code=500) |
|
|
|
if __name__ == "__main__": |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|